Index: include/llvm/CodeGen/MachineRegisterInfo.h =================================================================== --- include/llvm/CodeGen/MachineRegisterInfo.h +++ include/llvm/CodeGen/MachineRegisterInfo.h @@ -84,14 +84,15 @@ /// all registers that were disabled are removed from the list. SmallVector UpdatedCSRs; - /// RegAllocHints - This vector records register allocation hints for virtual - /// registers. For each virtual register, it keeps a register and hint type - /// pair making up the allocation hint. Hint type is target specific except - /// for the value 0 which means the second value of the pair is the preferred - /// register for allocation. For example, if the hint is <0, 1024>, it means - /// the allocator should prefer the physical register allocated to the virtual - /// register of the hint. - IndexedMap, VirtReg2IndexFunctor> RegAllocHints; + /// RegAllocHints - This vector records register allocation hints for + /// virtual registers. For each virtual register, it keeps a pair of hint + /// type and hints vector making up the allocation hints. Only the first + /// hint may be target specific, and in that case this is reflected by the + /// first member of the pair being non-zero. If the hinted register is + /// virtual, it means the allocator should prefer the physical register + /// allocated to it if any. + IndexedMap>, + VirtReg2IndexFunctor> RegAllocHints; /// PhysRegUseDefLists - This is an array of the head of the use/def list for /// physical registers. @@ -702,35 +703,55 @@ void clearVirtRegs(); /// setRegAllocationHint - Specify a register allocation hint for the - /// specified virtual register. + /// specified virtual register. This is typically used by target, and in case + /// of an earlier hint it will be overwritten. void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg) { assert(TargetRegisterInfo::isVirtualRegister(VReg)); RegAllocHints[VReg].first = Type; - RegAllocHints[VReg].second = PrefReg; + RegAllocHints[VReg].second.clear(); + RegAllocHints[VReg].second.push_back(PrefReg); } - /// Specify the preferred register allocation hint for the specified virtual - /// register. + /// addRegAllocationHint - Add a register allocation hint to the hints + /// vector for VReg. + void addRegAllocationHint(unsigned VReg, unsigned PrefReg) { + assert(TargetRegisterInfo::isVirtualRegister(VReg)); + RegAllocHints[VReg].second.push_back(PrefReg); + } + + /// Specify the preferred (target independent) register allocation hint for + /// the specified virtual register. void setSimpleHint(unsigned VReg, unsigned PrefReg) { setRegAllocationHint(VReg, /*Type=*/0, PrefReg); } /// getRegAllocationHint - Return the register allocation hint for the - /// specified virtual register. + /// specified virtual register. If there are many hints, this returns the + /// one with the greatest weight. std::pair getRegAllocationHint(unsigned VReg) const { assert(TargetRegisterInfo::isVirtualRegister(VReg)); - return RegAllocHints[VReg]; + unsigned BestHint = (RegAllocHints[VReg].second.size() ? + RegAllocHints[VReg].second[0] : 0); + return std::pair(RegAllocHints[VReg].first, BestHint); } - /// getSimpleHint - Return the preferred register allocation hint, or 0 if a - /// standard simple hint (Type == 0) is not set. + /// getSimpleHint - same as getRegAllocationHint except it will only return + /// a target independent hint. unsigned getSimpleHint(unsigned VReg) const { assert(TargetRegisterInfo::isVirtualRegister(VReg)); std::pair Hint = getRegAllocationHint(VReg); return Hint.first ? 0 : Hint.second; } + /// getRegAllocationHints - Return a reference to the vector of all + /// register allocation hints for VReg. + const std::pair> + &getRegAllocationHints(unsigned VReg) const { + assert(TargetRegisterInfo::isVirtualRegister(VReg)); + return RegAllocHints[VReg]; + } + /// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the /// specified register as undefined which causes the DBG_VALUE to be /// deleted during LiveDebugVariables analysis. Index: include/llvm/Target/TargetRegisterInfo.h =================================================================== --- include/llvm/Target/TargetRegisterInfo.h +++ include/llvm/Target/TargetRegisterInfo.h @@ -785,11 +785,10 @@ /// as returned from RegisterClassInfo::getOrder(). The hint registers must /// come from Order, and they must not be reserved. /// - /// The default implementation of this function can resolve - /// target-independent hints provided to MRI::setRegAllocationHint with - /// HintType == 0. Targets that override this function should defer to the - /// default implementation if they have no reason to change the allocation - /// order for VirtReg. There may be target-independent hints. + /// The default implementation of this function will only add target + /// independent register allocation hints. Targets that override this + /// function should typically call this default implementation as well and + /// expect to see generic copy hints added. virtual bool getRegAllocationHints(unsigned VirtReg, ArrayRef Order, SmallVectorImpl &Hints, Index: lib/CodeGen/CalcSpillWeights.cpp =================================================================== --- lib/CodeGen/CalcSpillWeights.cpp +++ lib/CodeGen/CalcSpillWeights.cpp @@ -69,14 +69,16 @@ if (TargetRegisterInfo::isVirtualRegister(hreg)) return sub == hsub ? hreg : 0; + unsigned CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg); const TargetRegisterClass *rc = mri.getRegClass(reg); + if (rc->contains(CopiedPReg)) + return CopiedPReg; - // Only allow physreg hints in rc. - if (sub == 0) - return rc->contains(hreg) ? hreg : 0; + // Check if reg:sub matches so that a super register could be hinted. + if (sub) + return tri.getMatchingSuperReg(CopiedPReg, sub, rc); - // reg:sub should match the physreg hreg. - return tri.getMatchingSuperReg(hreg, sub, rc); + return 0; } // Check if all values in LI are rematerializable @@ -157,13 +159,6 @@ unsigned numInstr = 0; // Number of instructions using li SmallPtrSet visited; - // Find the best physreg hint and the best virtreg hint. - float bestPhys = 0, bestVirt = 0; - unsigned hintPhys = 0, hintVirt = 0; - - // Don't recompute a target specific hint. - bool noHint = mri.getRegAllocationHint(li.reg).first != 0; - // Don't recompute spill weight for an unspillable register. bool Spillable = li.isSpillable(); @@ -188,6 +183,24 @@ numInstr += 2; } + // CopyHint is a sortable hint derived from a COPY instruction. + struct CopyHint { + unsigned Reg; + float Weight; + bool IsPhys; + CopyHint(unsigned R, float W, bool P) : Reg(R), Weight(W), IsPhys(P) {} + bool operator<(const CopyHint &rhs) const { + // Always prefer any physreg hint. + if (IsPhys != rhs.IsPhys) + return (IsPhys && !rhs.IsPhys); + if (Weight != rhs.Weight) + return (Weight > rhs.Weight); + // (just for the purpose of maintaining the set) + return Reg < rhs.Reg; + } + }; + std::set CopyHints; + for (MachineRegisterInfo::reg_instr_iterator I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end(); I != E; ) { @@ -227,7 +240,7 @@ } // Get allocation hints from copies. - if (noHint || !mi->isCopy()) + if (!mi->isCopy()) continue; unsigned hint = copyHint(mi, li.reg, tri, mri); if (!hint) @@ -237,28 +250,24 @@ // // FIXME: we probably shouldn't use floats at all. volatile float hweight = Hint[hint] += weight; - if (TargetRegisterInfo::isPhysicalRegister(hint)) { - if (hweight > bestPhys && mri.isAllocatable(hint)) { - bestPhys = hweight; - hintPhys = hint; - } - } else { - if (hweight > bestVirt) { - bestVirt = hweight; - hintVirt = hint; - } - } + CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint))); } Hint.clear(); - // Always prefer the physreg hint. + // Pass all the sorted copy hints to mri. if (updateLI) { - if (unsigned hint = hintPhys ? hintPhys : hintVirt) { - mri.setRegAllocationHint(li.reg, 0, hint); + std::pair TargetHint = mri.getRegAllocationHint(li.reg); + for (auto &Hint : CopyHints) { + if (Hint.Reg == TargetHint.second) + // Don't add again the target hint. + continue; + mri.addRegAllocationHint(li.reg, Hint.Reg); + } + + if (CopyHints.size()) // Weakly boost the spill weight of hinted registers. totalWeight *= 1.01F; - } } // If the live interval was already unspillable, leave it that way. Index: lib/CodeGen/TargetRegisterInfo.cpp =================================================================== --- lib/CodeGen/TargetRegisterInfo.cpp +++ lib/CodeGen/TargetRegisterInfo.cpp @@ -368,31 +368,36 @@ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo &MRI = MF.getRegInfo(); - std::pair Hint = MRI.getRegAllocationHint(VirtReg); - - // Hints with HintType != 0 were set by target-dependent code. - // Such targets must provide their own implementation of - // TRI::getRegAllocationHints to interpret those hint types. - assert(Hint.first == 0 && "Target must implement TRI::getRegAllocationHints"); - - // Target-independent hints are either a physical or a virtual register. - unsigned Phys = Hint.second; - if (VRM && isVirtualRegister(Phys)) - Phys = VRM->getPhys(Phys); - - // Check that Phys is a valid hint in VirtReg's register class. - if (!isPhysicalRegister(Phys)) - return false; - if (MRI.isReserved(Phys)) - return false; - // Check that Phys is in the allocation order. We shouldn't heed hints - // from VirtReg's register class if they aren't in the allocation order. The - // target probably has a reason for removing the register. - if (!is_contained(Order, Phys)) - return false; - - // All clear, tell the register allocator to prefer this register. - Hints.push_back(Phys); + const std::pair> &Hints_MRI = + MRI.getRegAllocationHints(VirtReg); + + // First hint may be a target hint. + bool Skip = (Hints_MRI.first != 0); + for (auto Reg : Hints_MRI.second) { + if (Skip) { + Skip = false; + continue; + } + + // Target-independent hints are either a physical or a virtual register. + unsigned Phys = Reg; + if (VRM && isVirtualRegister(Phys)) + Phys = VRM->getPhys(Phys); + + // Check that Phys is a valid hint in VirtReg's register class. + if (!isPhysicalRegister(Phys)) + continue; + if (MRI.isReserved(Phys)) + continue; + // Check that Phys is in the allocation order. We shouldn't heed hints + // from VirtReg's register class if they aren't in the allocation order. The + // target probably has a reason for removing the register. + if (!is_contained(Order, Phys)) + continue; + + // All clear, tell the register allocator to prefer this register. + Hints.push_back(Phys); + } return false; } Index: test/CodeGen/AArch64/arm64-aapcs.ll =================================================================== --- test/CodeGen/AArch64/arm64-aapcs.ll +++ test/CodeGen/AArch64/arm64-aapcs.ll @@ -5,20 +5,20 @@ ; CHECK-LABEL: @test_i128_align define i128 @test_i128_align(i32, i128 %arg, i32 %after) { store i32 %after, i32* @var, align 4 -; CHECK: str w4, [{{x[0-9]+}}, :lo12:var] +; CHECK-DAG: str w4, [{{x[0-9]+}}, :lo12:var] ret i128 %arg -; CHECK: mov x0, x2 -; CHECK: mov x1, x3 +; CHECK-DAG: mov x0, x2 +; CHECK-DAG: mov x1, x3 } ; CHECK-LABEL: @test_i64x2_align define [2 x i64] @test_i64x2_align(i32, [2 x i64] %arg, i32 %after) { store i32 %after, i32* @var, align 4 -; CHECK: str w3, [{{x[0-9]+}}, :lo12:var] +; CHECK-DAG: str w3, [{{x[0-9]+}}, :lo12:var] ret [2 x i64] %arg -; CHECK: mov x0, x1 +; CHECK-DAG: mov x0, x1 ; CHECK: mov x1, x2 } Index: test/CodeGen/AArch64/func-argpassing.ll =================================================================== --- test/CodeGen/AArch64/func-argpassing.ll +++ test/CodeGen/AArch64/func-argpassing.ll @@ -164,11 +164,11 @@ define i64 @check_i128_regalign(i32 %val0, i128 %val1, i64 %val2) { ; CHECK-LABEL: check_i128_regalign store i128 %val1, i128* @var128 -; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 +; CHECK-DAG: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 ; CHECK-DAG: stp x2, x3, [x[[VAR128]]] ret i64 %val2 -; CHECK: mov x0, x4 +; CHECK-DAG: mov x0, x4 } define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3, Index: test/CodeGen/AArch64/swifterror.ll =================================================================== --- test/CodeGen/AArch64/swifterror.ll +++ test/CodeGen/AArch64/swifterror.ll @@ -40,11 +40,11 @@ ; CHECK-APPLE: mov [[ID:x[0-9]+]], x0 ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE: mov x0, x21 +; CHECK-APPLE: cbnz x0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller: @@ -263,11 +263,11 @@ ; CHECK-APPLE: mov [[ID:x[0-9]+]], x0 ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo_sret -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE: mov x0, x21 +; CHECK-APPLE: cbnz x0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller3: @@ -358,11 +358,11 @@ ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo_vararg -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE: mov x0, x21 +; CHECK-APPLE: cbnz x0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free entry: %error_ptr_ref = alloca swifterror %swift_error* Index: test/CodeGen/AArch64/win64_vararg.ll =================================================================== --- test/CodeGen/AArch64/win64_vararg.ll +++ test/CodeGen/AArch64/win64_vararg.ll @@ -161,25 +161,25 @@ ; CHECK: add x8, x8, #15 ; CHECK: mov x9, sp ; CHECK: and x8, x8, #0x1fffffff0 -; CHECK: sub x20, x9, x8 +; CHECK: sub [[REG:x[0-9]+]], x9, x8 ; CHECK: mov x19, x1 -; CHECK: mov x23, sp +; CHECK: mov [[REG2:x[0-9]+]], sp ; CHECK: stp x6, x7, [x29, #48] ; CHECK: stp x4, x5, [x29, #32] ; CHECK: stp x2, x3, [x29, #16] -; CHECK: mov sp, x20 -; CHECK: ldur x21, [x29, #-40] -; CHECK: sxtw x22, w0 +; CHECK: mov sp, [[REG]] +; CHECK: ldur [[REG3:x[0-9]+]], [x29, #-40] +; CHECK: sxtw [[REG4:x[0-9]+]], w0 ; CHECK: bl __local_stdio_printf_options ; CHECK: ldr x8, [x0] -; CHECK: mov x1, x20 -; CHECK: mov x2, x22 +; CHECK: mov x1, [[REG]] +; CHECK: mov x2, [[REG4]] ; CHECK: mov x3, x19 ; CHECK: orr x0, x8, #0x2 ; CHECK: mov x4, xzr -; CHECK: mov x5, x21 +; CHECK: mov x5, [[REG3]] ; CHECK: bl __stdio_common_vsprintf -; CHECK: mov sp, x23 +; CHECK: mov sp, [[REG2]] ; CHECK: sub sp, x29, #48 ; CHECK: ldp x29, x30, [sp, #48] ; CHECK: ldp x20, x19, [sp, #32] @@ -255,17 +255,15 @@ ; CHECK-LABEL: fixed_params ; CHECK: sub sp, sp, #32 -; CHECK: mov w8, w3 -; CHECK: mov w9, w2 -; CHECK: mov w10, w1 +; CHECK-DAG: mov w6, w3 +; CHECK-DAG: mov [[REG1:w[0-9]+]], w2 +; CHECK: mov w2, w1 ; CHECK: str w4, [sp] ; CHECK: fmov x1, d0 ; CHECK: fmov x3, d1 ; CHECK: fmov x5, d2 ; CHECK: fmov x7, d3 -; CHECK: mov w2, w10 -; CHECK: mov w4, w9 -; CHECK: mov w6, w8 +; CHECK: mov w4, [[REG1]] ; CHECK: str x30, [sp, #16] ; CHECK: str d4, [sp, #8] ; CHECK: bl varargs Index: test/CodeGen/AMDGPU/callee-special-input-sgprs.ll =================================================================== --- test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -208,8 +208,8 @@ ; GCN: enable_sgpr_workgroup_id_z = 0 ; GCN: s_mov_b32 s33, s8 -; GCN: s_mov_b32 s4, s33 -; GCN: s_mov_b32 s6, s7 +; GCN-DAG: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s6, s7 ; GCN: s_mov_b32 s32, s33 ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 { @@ -223,8 +223,8 @@ ; GCN: enable_sgpr_workgroup_id_z = 1 ; GCN: s_mov_b32 s33, s8 -; GCN: s_mov_b32 s4, s33 -; GCN: s_mov_b32 s6, s7 +; GCN-DAG: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s6, s7 ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 { call void @use_workgroup_id_z() @@ -396,7 +396,7 @@ ; GCN-DAG: s_mov_b32 s33, s8 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b -; GCN: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s4, s33 ; GCN-DAG: s_mov_b32 s6, s7 ; GCN-DAG: s_mov_b32 s32, s33 ; GCN: s_swappc_b64 @@ -412,7 +412,7 @@ ; GCN: s_mov_b32 s33, s8 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b -; GCN: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s4, s33 ; GCN-DAG: s_mov_b32 s6, s7 ; GCN: s_mov_b32 s32, s33 Index: test/CodeGen/AMDGPU/callee-special-input-vgprs.ll =================================================================== --- test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -220,8 +220,8 @@ ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z: ; GCN: enable_vgpr_workitem_id = 2 -; GCN: v_mov_b32_e32 v0, 0x22b -; GCN: v_mov_b32_e32 v1, v2 +; GCN-DAG: v_mov_b32_e32 v0, 0x22b +; GCN-DAG: v_mov_b32_e32 v1, v2 ; GCN: s_swappc_b64 ; GCN-NOT: v0 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 { Index: test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll @@ -41,7 +41,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_oeq: -; GCN: v_cmp_eq_f32_e64 +; GCN: v_cmp_eq_f32_e32 define amdgpu_kernel void @v_fcmp_f32_oeq(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 1) store i64 %result, i64 addrspace(1)* %out @@ -49,7 +49,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_one: -; GCN: v_cmp_neq_f32_e64 +; GCN: v_cmp_neq_f32_e32 define amdgpu_kernel void @v_fcmp_f32_one(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 6) store i64 %result, i64 addrspace(1)* %out @@ -57,7 +57,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_ogt: -; GCN: v_cmp_gt_f32_e64 +; GCN: v_cmp_gt_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ogt(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 2) store i64 %result, i64 addrspace(1)* %out @@ -65,7 +65,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_oge: -; GCN: v_cmp_ge_f32_e64 +; GCN: v_cmp_ge_f32_e32 define amdgpu_kernel void @v_fcmp_f32_oge(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 3) store i64 %result, i64 addrspace(1)* %out @@ -73,7 +73,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_olt: -; GCN: v_cmp_lt_f32_e64 +; GCN: v_cmp_lt_f32_e32 define amdgpu_kernel void @v_fcmp_f32_olt(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 4) store i64 %result, i64 addrspace(1)* %out @@ -81,7 +81,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_ole: -; GCN: v_cmp_le_f32_e64 +; GCN: v_cmp_le_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ole(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 5) store i64 %result, i64 addrspace(1)* %out @@ -90,7 +90,7 @@ ; GCN-LABEL: {{^}}v_fcmp_f32_ueq: -; GCN: v_cmp_nlg_f32_e64 +; GCN: v_cmp_nlg_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ueq(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 9) store i64 %result, i64 addrspace(1)* %out @@ -98,7 +98,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_une: -; GCN: v_cmp_neq_f32_e64 +; GCN: v_cmp_neq_f32_e32 define amdgpu_kernel void @v_fcmp_f32_une(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 14) store i64 %result, i64 addrspace(1)* %out @@ -106,7 +106,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_ugt: -; GCN: v_cmp_nle_f32_e64 +; GCN: v_cmp_nle_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ugt(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 10) store i64 %result, i64 addrspace(1)* %out @@ -114,7 +114,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_uge: -; GCN: v_cmp_nlt_f32_e64 +; GCN: v_cmp_nlt_f32_e32 define amdgpu_kernel void @v_fcmp_f32_uge(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 11) store i64 %result, i64 addrspace(1)* %out @@ -122,7 +122,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_ult: -; GCN: v_cmp_nge_f32_e64 +; GCN: v_cmp_nge_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ult(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 12) store i64 %result, i64 addrspace(1)* %out @@ -130,7 +130,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_ule: -; GCN: v_cmp_ngt_f32_e64 +; GCN: v_cmp_ngt_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ule(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 13) store i64 %result, i64 addrspace(1)* %out @@ -138,7 +138,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_oeq: -; GCN: v_cmp_eq_f64_e64 +; GCN: v_cmp_eq_f64_e32 define amdgpu_kernel void @v_fcmp_f64_oeq(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 1) store i64 %result, i64 addrspace(1)* %out @@ -146,7 +146,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_one: -; GCN: v_cmp_neq_f64_e64 +; GCN: v_cmp_neq_f64_e32 define amdgpu_kernel void @v_fcmp_f64_one(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 6) store i64 %result, i64 addrspace(1)* %out @@ -154,7 +154,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ogt: -; GCN: v_cmp_gt_f64_e64 +; GCN: v_cmp_gt_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ogt(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 2) store i64 %result, i64 addrspace(1)* %out @@ -162,7 +162,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_oge: -; GCN: v_cmp_ge_f64_e64 +; GCN: v_cmp_ge_f64_e32 define amdgpu_kernel void @v_fcmp_f64_oge(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 3) store i64 %result, i64 addrspace(1)* %out @@ -170,7 +170,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_olt: -; GCN: v_cmp_lt_f64_e64 +; GCN: v_cmp_lt_f64_e32 define amdgpu_kernel void @v_fcmp_f64_olt(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 4) store i64 %result, i64 addrspace(1)* %out @@ -178,7 +178,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ole: -; GCN: v_cmp_le_f64_e64 +; GCN: v_cmp_le_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ole(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 5) store i64 %result, i64 addrspace(1)* %out @@ -186,7 +186,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ueq: -; GCN: v_cmp_nlg_f64_e64 +; GCN: v_cmp_nlg_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ueq(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 9) store i64 %result, i64 addrspace(1)* %out @@ -194,7 +194,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_une: -; GCN: v_cmp_neq_f64_e64 +; GCN: v_cmp_neq_f64_e32 define amdgpu_kernel void @v_fcmp_f64_une(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 14) store i64 %result, i64 addrspace(1)* %out @@ -202,7 +202,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ugt: -; GCN: v_cmp_nle_f64_e64 +; GCN: v_cmp_nle_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ugt(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 10) store i64 %result, i64 addrspace(1)* %out @@ -210,7 +210,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_uge: -; GCN: v_cmp_nlt_f64_e64 +; GCN: v_cmp_nlt_f64_e32 define amdgpu_kernel void @v_fcmp_f64_uge(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 11) store i64 %result, i64 addrspace(1)* %out @@ -218,7 +218,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ult: -; GCN: v_cmp_nge_f64_e64 +; GCN: v_cmp_nge_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ult(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 12) store i64 %result, i64 addrspace(1)* %out @@ -226,7 +226,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ule: -; GCN: v_cmp_ngt_f64_e64 +; GCN: v_cmp_ngt_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ule(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 13) store i64 %result, i64 addrspace(1)* %out Index: test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll @@ -14,7 +14,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i32_eq: -; GCN: v_cmp_eq_u32_e64 +; GCN: v_cmp_eq_u32_e32 define amdgpu_kernel void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32) store i64 %result, i64 addrspace(1)* %out @@ -29,7 +29,7 @@ ret void } ; GCN-LABEL: {{^}}v_icmp_i32_ne: -; GCN: v_cmp_ne_u32_e64 +; GCN: v_cmp_ne_u32_e32 define amdgpu_kernel void @v_icmp_i32_ne(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33) store i64 %result, i64 addrspace(1)* %out @@ -37,7 +37,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u32_ugt: -; GCN: v_cmp_gt_u32_e64 +; GCN: v_cmp_gt_u32_e32 define amdgpu_kernel void @v_icmp_u32_ugt(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 34) store i64 %result, i64 addrspace(1)* %out @@ -45,7 +45,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u32_uge: -; GCN: v_cmp_ge_u32_e64 +; GCN: v_cmp_ge_u32_e32 define amdgpu_kernel void @v_icmp_u32_uge(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 35) store i64 %result, i64 addrspace(1)* %out @@ -53,7 +53,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u32_ult: -; GCN: v_cmp_lt_u32_e64 +; GCN: v_cmp_lt_u32_e32 define amdgpu_kernel void @v_icmp_u32_ult(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 36) store i64 %result, i64 addrspace(1)* %out @@ -61,7 +61,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u32_ule: -; GCN: v_cmp_le_u32_e64 +; GCN: v_cmp_le_u32_e32 define amdgpu_kernel void @v_icmp_u32_ule(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 37) store i64 %result, i64 addrspace(1)* %out @@ -69,7 +69,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i32_sgt: -; GCN: v_cmp_gt_i32_e64 +; GCN: v_cmp_gt_i32_e32 define amdgpu_kernel void @v_icmp_i32_sgt(i64 addrspace(1)* %out, i32 %src) #1 { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 38) store i64 %result, i64 addrspace(1)* %out @@ -77,7 +77,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i32_sge: -; GCN: v_cmp_ge_i32_e64 +; GCN: v_cmp_ge_i32_e32 define amdgpu_kernel void @v_icmp_i32_sge(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 39) store i64 %result, i64 addrspace(1)* %out @@ -85,14 +85,14 @@ } ; GCN-LABEL: {{^}}v_icmp_i32_slt: -; GCN: v_cmp_lt_i32_e64 +; GCN: v_cmp_lt_i32_e32 define amdgpu_kernel void @v_icmp_i32_slt(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 40) store i64 %result, i64 addrspace(1)* %out ret void } ; GCN-LABEL: {{^}}v_icmp_i32_sle: -; GCN: v_cmp_le_i32_e64 +; GCN: v_cmp_le_i32_e32 define amdgpu_kernel void @v_icmp_i32_sle(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 41) store i64 %result, i64 addrspace(1)* %out @@ -100,7 +100,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i64_eq: -; GCN: v_cmp_eq_u64_e64 +; GCN: v_cmp_eq_u64_e32 define amdgpu_kernel void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32) store i64 %result, i64 addrspace(1)* %out @@ -108,7 +108,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i64_ne: -; GCN: v_cmp_ne_u64_e64 +; GCN: v_cmp_ne_u64_e32 define amdgpu_kernel void @v_icmp_i64_ne(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33) store i64 %result, i64 addrspace(1)* %out @@ -116,7 +116,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u64_ugt: -; GCN: v_cmp_gt_u64_e64 +; GCN: v_cmp_gt_u64_e32 define amdgpu_kernel void @v_icmp_u64_ugt(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34) store i64 %result, i64 addrspace(1)* %out @@ -124,7 +124,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u64_uge: -; GCN: v_cmp_ge_u64_e64 +; GCN: v_cmp_ge_u64_e32 define amdgpu_kernel void @v_icmp_u64_uge(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35) store i64 %result, i64 addrspace(1)* %out @@ -132,7 +132,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u64_ult: -; GCN: v_cmp_lt_u64_e64 +; GCN: v_cmp_lt_u64_e32 define amdgpu_kernel void @v_icmp_u64_ult(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36) store i64 %result, i64 addrspace(1)* %out @@ -140,7 +140,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u64_ule: -; GCN: v_cmp_le_u64_e64 +; GCN: v_cmp_le_u64_e32 define amdgpu_kernel void @v_icmp_u64_ule(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37) store i64 %result, i64 addrspace(1)* %out @@ -148,7 +148,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i64_sgt: -; GCN: v_cmp_gt_i64_e64 +; GCN: v_cmp_gt_i64_e32 define amdgpu_kernel void @v_icmp_i64_sgt(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38) store i64 %result, i64 addrspace(1)* %out @@ -156,7 +156,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i64_sge: -; GCN: v_cmp_ge_i64_e64 +; GCN: v_cmp_ge_i64_e32 define amdgpu_kernel void @v_icmp_i64_sge(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39) store i64 %result, i64 addrspace(1)* %out @@ -164,14 +164,14 @@ } ; GCN-LABEL: {{^}}v_icmp_i64_slt: -; GCN: v_cmp_lt_i64_e64 +; GCN: v_cmp_lt_i64_e32 define amdgpu_kernel void @v_icmp_i64_slt(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40) store i64 %result, i64 addrspace(1)* %out ret void } ; GCN-LABEL: {{^}}v_icmp_i64_sle: -; GCN: v_cmp_le_i64_e64 +; GCN: v_cmp_le_i64_e32 define amdgpu_kernel void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41) store i64 %result, i64 addrspace(1)* %out Index: test/CodeGen/AMDGPU/multilevel-break.ll =================================================================== --- test/CodeGen/AMDGPU/multilevel-break.ll +++ test/CodeGen/AMDGPU/multilevel-break.ll @@ -23,7 +23,8 @@ ; GCN: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP.outer{{$}} ; GCN: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP{{$}} -; GCN: s_and_saveexec_b64 [[SAVE_BREAK:s\[[0-9]+:[0-9]+\]]], vcc +; GCN: s_and_saveexec_b64 [[SAVE_BREAK_PREXOR:s\[[0-9]+:[0-9]+\]]], vcc +; GCN-NEXT: s_xor_b64 [[SAVE_BREAK:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE_BREAK_PREXOR]] ; GCN: BB{{[0-9]+}}_{{[0-9]+}}: ; %Flow{{$}} ; GCN-NEXT: ; in Loop: Header=[[INNER_LOOP]] Depth=2 Index: test/CodeGen/AMDGPU/ret.ll =================================================================== --- test/CodeGen/AMDGPU/ret.ll +++ test/CodeGen/AMDGPU/ret.ll @@ -126,9 +126,9 @@ ; GCN-LABEL: {{^}}vgpr_ps_addr119: ; GCN-DAG: v_mov_b32_e32 v0, v2 ; GCN-DAG: v_mov_b32_e32 v1, v3 -; GCN: v_mov_b32_e32 v2, v6 -; GCN: v_mov_b32_e32 v3, v8 -; GCN: v_mov_b32_e32 v4, v12 +; GCN-DAG: v_mov_b32_e32 v2, v6 +; GCN-DAG: v_mov_b32_e32 v3, v8 +; GCN-DAG: v_mov_b32_e32 v4, v12 ; GCN-NOT: s_endpgm define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 { bb: @@ -178,8 +178,8 @@ } ; GCN-LABEL: {{^}}sgpr: -; GCN: s_add_i32 s0, s3, 2 ; GCN: s_mov_b32 s2, s3 +; GCN: s_add_i32 s0, s2, 2 ; GCN-NOT: s_endpgm define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { bb: Index: test/CodeGen/AMDGPU/sgpr-control-flow.ll =================================================================== --- test/CodeGen/AMDGPU/sgpr-control-flow.ll +++ test/CodeGen/AMDGPU/sgpr-control-flow.ll @@ -76,9 +76,11 @@ ; different threads will take different control flow paths. ; SI-LABEL: {{^}}sgpr_if_else_valu_br: -; SI: s_add_i32 [[SGPR:s[0-9]+]] -; SI-NOT: s_add_i32 [[SGPR]] - +; SI: s_add_i32 [[SGPR0:s[0-9]+]] +; SI: v_mov_b32_e32 [[VGPR:v[0-9]+]], [[SGPR0]] +; SI: s_add_i32 [[SGPR1:s[0-9]+]] +; SI: v_mov_b32_e32 [[VGPR:v[0-9]+]], [[SGPR1]] +; SI: buffer_store_dword [[VGPR]] define amdgpu_kernel void @sgpr_if_else_valu_br(i32 addrspace(1)* %out, float %a, i32 %b, i32 %c, i32 %d, i32 %e) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 Index: test/CodeGen/ARM/longMAC.ll =================================================================== --- test/CodeGen/ARM/longMAC.ll +++ test/CodeGen/ARM/longMAC.ll @@ -337,20 +337,20 @@ @global_b = external global i16, align 2 ;CHECK-LABEL: MACLongTest15 ;CHECK-T2-DSP-NOT: {{asr|lsr}} -;CHECK-T2-DSP: smlaltb r2, r3, r0, r1 +;CHECK-T2-DSP: mov r1, r3 +;CHECK-T2-DSP: smlaltb r2, r1, r0, r3 ;CHECK-T2-DSP-NEXT: mov r0, r2 -;CHECK-T2-DSP-NEXT: mov r1, r3 ;CHECK-V5TE-NOT: {{asr|lsr}} -;CHECK-V5TE: smlaltb r2, r3, r0, r1 +;CHECK-V5TE: mov r1, r3 +;CHECK-V5TE: smlaltb r2, r1, r0, r3 ;CHECK-V5TE-NEXT: mov r0, r2 -;CHECK-V5TE-NEXT: mov r1, r3 ;CHECK-V7-LE-NOT: {{asr|lsr}} -;CHECK-V7-LE: smlaltb r2, r3, r0, r1 +;CHECK-V7-LE: mov r1, r3 +;CHECK-V7-LE: smlaltb r2, r1, r0, r3 ;CHECK-V7-LE-NEXT: mov r0, r2 -;CHECK-V7-LE-NEXT: mov r1, r3 -;CHECK-V7-THUMB-BE: smlaltb r3, r2, r0, r1 +;CHECK-V7-THUMB-BE: mov r1, r3 +;CHECK-V7-THUMB-BE: smlaltb r1, r2, r0, r3 ;CHECK-V7-THUMB-BE-NEXT: mov r0, r2 -;CHECK-V7-THUMB-BE-NEXT: mov r1, r3 ;CHECK-LE-NOT: smlaltb ;CHECK-BE-NOT: smlaltb ;CHECK-V6M-THUMB-NOT: smlaltb @@ -368,19 +368,19 @@ ;CHECK-LABEL: MACLongTest16 ;CHECK-T2-DSP-NOT: {{asr|lsr}} -;CHECK-T2-DSP: smlalbt r2, r3, r1, r0 +;CHECK-T2-DSP: mov r1, r3 +;CHECK-T2-DSP: smlalbt r2, r1, r3, r0 ;CHECK-T2-DSP-NEXT: mov r0, r2 -;CHECK-T2-DSP-NEXT: mov r1, r3 ;CHECK-V5TE-NOT: {{asr|lsr}} -;CHECK-V5TE: smlalbt r2, r3, r1, r0 +;CHECK-V5TE: mov r1, r3 +;CHECK-V5TE: smlalbt r2, r1, r3, r0 ;CHECK-V5TE-NEXT: mov r0, r2 -;CHECK-V5TE-NEXT: mov r1, r3 -;CHECK-V7-LE: smlalbt r2, r3, r1, r0 +;CHECK-V7-LE: mov r1, r3 +;CHECK-V7-LE: smlalbt r2, r1, r3, r0 ;CHECK-V7-LE-NEXT: mov r0, r2 -;CHECK-V7-LE-NEXT: mov r1, r3 -;CHECK-V7-THUMB-BE: smlalbt r3, r2, r1, r0 +;CHECK-V7-THUMB-BE: mov r1, r3 +;CHECK-V7-THUMB-BE: smlalbt r1, r2, r3, r0 ;CHECK-V7-THUMB-BE-NEXT: mov r0, r2 -;CHECK-V7-THUMB-BE-NEXT: mov r1, r3 ;CHECK-LE-NOT: smlalbt ;CHECK-BE-NOT: smlalbt ;CHECK-V6M-THUMB-NOT: smlalbt Index: test/CodeGen/ARM/select_xform.ll =================================================================== --- test/CodeGen/ARM/select_xform.ll +++ test/CodeGen/ARM/select_xform.ll @@ -4,13 +4,13 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind { ; ARM-LABEL: t1: -; ARM: suble r1, r1, #-2147483647 ; ARM: mov r0, r1 +; ARM: suble r0, r0, #-2147483647 ; T2-LABEL: t1: -; T2: mvn r0, #-2147483648 -; T2: addle r1, r0 ; T2: mov r0, r1 +; T2: mvn r1, #-2147483648 +; T2: addle r0, r1 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 2147483647 %tmp3 = add i32 %tmp2, %b @@ -19,12 +19,12 @@ define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; ARM-LABEL: t2: -; ARM: suble r1, r1, #10 ; ARM: mov r0, r1 +; ARM: suble r0, r0, #10 ; T2-LABEL: t2: -; T2: suble r1, #10 ; T2: mov r0, r1 +; T2: suble r0, #10 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 10 %tmp3 = sub i32 %b, %tmp2 Index: test/CodeGen/ARM/ssp-data-layout.ll =================================================================== --- test/CodeGen/ARM/ssp-data-layout.ll +++ test/CodeGen/ARM/ssp-data-layout.ll @@ -450,7 +450,7 @@ ; CHECK: strb r0, [sp, #68] ; CHECK: bl end_struct_small_char ; CHECK: bl get_struct_large_char2 -; CHECK: strb r0, [sp, #106] +; CHECK: strb r0, [sp, #110] ; CHECK: bl end_struct_large_char2 %a = alloca %struct.struct_small_char, align 1 %b = alloca %struct.struct_large_char2, align 1 Index: test/CodeGen/ARM/struct_byval_arm_t1_t2.ll =================================================================== --- test/CodeGen/ARM/struct_byval_arm_t1_t2.ll +++ test/CodeGen/ARM/struct_byval_arm_t1_t2.ll @@ -122,17 +122,17 @@ ;THUMB1-LABEL: test_A_8: ;T1POST-LABEL: test_A_8: define void @test_A_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.A, align 8 call void @use_A(%struct.A* byval align 8 %a) @@ -144,19 +144,19 @@ ;THUMB1-LABEL: test_A_16: ;T1POST-LABEL: test_A_16: define void @test_A_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.A, align 16 call void @use_A(%struct.A* byval align 16 %a) @@ -239,21 +239,21 @@ ;THUMB1-LABEL: test_B_8: ;T1POST-LABEL: test_B_8: define void @test_B_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.B, align 8 call void @use_B(%struct.B* byval align 8 %a) @@ -265,21 +265,21 @@ ;THUMB1-LABEL: test_B_16: ;T1POST-LABEL: test_B_16: define void @test_B_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.B, align 16 call void @use_B(%struct.B* byval align 16 %a) @@ -363,22 +363,22 @@ ;THUMB1-LABEL: test_C_8: ;T1POST-LABEL: test_C_8: define void @test_C_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #1 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.C, align 8 call void @use_C(%struct.C* byval align 8 %a) @@ -390,22 +390,22 @@ ;THUMB1-LABEL: test_C_16: ;T1POST-LABEL: test_C_16: define void @test_C_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #1 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.C, align 16 call void @use_C(%struct.C* byval align 16 %a) @@ -492,21 +492,21 @@ ;THUMB1-LABEL: test_D_8: ;T1POST-LABEL: test_D_8: define void @test_D_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.D, align 8 call void @use_D(%struct.D* byval align 8 %a) @@ -518,21 +518,21 @@ ;THUMB1-LABEL: test_D_16: ;T1POST-LABEL: test_D_16: define void @test_D_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.D, align 16 call void @use_D(%struct.D* byval align 16 %a) @@ -627,25 +627,25 @@ ;THUMB1-LABEL: test_E_8: ;T1POST-LABEL: test_E_8: define void @test_E_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne ;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne ;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.E, align 8 call void @use_E(%struct.E* byval align 8 %a) @@ -657,25 +657,25 @@ ;THUMB1-LABEL: test_E_16: ;T1POST-LABEL: test_E_16: define void @test_E_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne ;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne ;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.E, align 16 call void @use_E(%struct.E* byval align 16 %a) @@ -771,18 +771,18 @@ ;THUMB1-LABEL: test_F_8: ;T1POST-LABEL: test_F_8: define void @test_F_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne ;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 @@ -790,7 +790,7 @@ ;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #1 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.F, align 8 call void @use_F(%struct.F* byval align 8 %a) @@ -802,18 +802,18 @@ ;THUMB1-LABEL: test_F_16: ;T1POST-LABEL: test_F_16: define void @test_F_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne ;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 @@ -821,7 +821,7 @@ ;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #1 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.F, align 16 call void @use_F(%struct.F* byval align 16 %a) @@ -896,17 +896,17 @@ ;THUMB1-LABEL: test_G_8: ;T1POST-LABEL: test_G_8: define void @test_G_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.G, align 8 call void @use_G(%struct.G* byval align 8 %a) @@ -918,17 +918,17 @@ ;THUMB1-LABEL: test_G_16: ;T1POST-LABEL: test_G_16: define void @test_G_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.G, align 16 call void @use_G(%struct.G* byval align 16 %a) @@ -1003,17 +1003,17 @@ ;THUMB1-LABEL: test_H_8: ;T1POST-LABEL: test_H_8: define void @test_H_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.H, align 8 call void @use_H(%struct.H* byval align 8 %a) @@ -1025,17 +1025,17 @@ ;THUMB1-LABEL: test_H_16: ;T1POST-LABEL: test_H_16: define void @test_H_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.H, align 16 call void @use_H(%struct.H* byval align 16 %a) @@ -1110,17 +1110,17 @@ ;THUMB1-LABEL: test_I_8: ;T1POST-LABEL: test_I_8: define void @test_I_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.I, align 8 call void @use_I(%struct.I* byval align 8 %a) @@ -1132,17 +1132,17 @@ ;THUMB1-LABEL: test_I_16: ;T1POST-LABEL: test_I_16: define void @test_I_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.I, align 16 call void @use_I(%struct.I* byval align 16 %a) @@ -1229,21 +1229,21 @@ ;THUMB1-LABEL: test_J_8: ;T1POST-LABEL: test_J_8: define void @test_J_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.J, align 8 call void @use_J(%struct.J* byval align 8 %a) @@ -1255,21 +1255,21 @@ ;THUMB1-LABEL: test_J_16: ;T1POST-LABEL: test_J_16: define void @test_J_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.J, align 16 call void @use_J(%struct.J* byval align 16 %a) @@ -1356,21 +1356,21 @@ ;THUMB1-LABEL: test_K_8: ;T1POST-LABEL: test_K_8: define void @test_K_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.K, align 8 call void @use_K(%struct.K* byval align 8 %a) @@ -1382,21 +1382,21 @@ ;THUMB1-LABEL: test_K_16: ;T1POST-LABEL: test_K_16: define void @test_K_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.K, align 16 call void @use_K(%struct.K* byval align 16 %a) @@ -1483,21 +1483,21 @@ ;THUMB1-LABEL: test_L_8: ;T1POST-LABEL: test_L_8: define void @test_L_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.L, align 8 call void @use_L(%struct.L* byval align 8 %a) @@ -1509,21 +1509,21 @@ ;THUMB1-LABEL: test_L_16: ;T1POST-LABEL: test_L_16: define void @test_L_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.L, align 16 call void @use_L(%struct.L* byval align 16 %a) Index: test/CodeGen/ARM/swifterror.ll =================================================================== --- test/CodeGen/ARM/swifterror.ll +++ test/CodeGen/ARM/swifterror.ll @@ -39,11 +39,11 @@ ; CHECK-APPLE-DAG: mov [[ID:r[0-9]+]], r0 ; CHECK-APPLE-DAG: mov r8, #0 ; CHECK-APPLE: bl {{.*}}foo -; CHECK-APPLE: cmp r8, #0 +; CHECK-APPLE: mov r0, r8 +; CHECK-APPLE: cmp r0, #0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r8, #8] +; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r0, #8] ; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov r0, r8 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller: @@ -138,7 +138,7 @@ ; CHECK-APPLE: eq ; CHECK-APPLE: mov r0, #16 ; CHECK-APPLE: malloc -; CHECK-APPLE: mov [[ID:r[0-9]+]], #1 +; CHECK-APPLE-DAG: mov [[ID:r[0-9]+]], #1 ; CHECK-APPLE-DAG: mov r8, r{{.*}} ; CHECK-APPLE-DAG: strb [[ID]], [r{{.*}}, #8] @@ -177,14 +177,13 @@ ; CHECK-APPLE-LABEL: foo_loop: ; CHECK-APPLE: mov [[CODE:r[0-9]+]], r0 ; swifterror is kept in a register -; CHECK-APPLE: mov [[ID:r[0-9]+]], r8 ; CHECK-APPLE: cmp [[CODE]], #0 ; CHECK-APPLE: beq ; CHECK-APPLE: mov r0, #16 ; CHECK-APPLE: malloc -; CHECK-APPLE: strb r{{.*}}, [{{.*}}[[ID]], #8] +; CHECK-APPLE: mov r8, r0 +; CHECK-APPLE: strb r{{.*}}, [r8, #8] ; CHECK-APPLE: ble -; CHECK-APPLE: mov r8, [[ID]] ; CHECK-O0-LABEL: foo_loop: ; CHECK-O0: mov r{{.*}}, r8 @@ -266,11 +265,11 @@ ; CHECK-APPLE: mov [[ID:r[0-9]+]], r0 ; CHECK-APPLE: mov r8, #0 ; CHECK-APPLE: bl {{.*}}foo_sret -; CHECK-APPLE: cmp r8, #0 +; CHECK-APPLE: mov r0, r8 +; CHECK-APPLE: cmp r0, #0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r8, #8] +; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r0, #8] ; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov r0, r8 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller3: @@ -314,10 +313,9 @@ ; CHECK-APPLE-LABEL: foo_vararg: ; CHECK-APPLE: mov r0, #16 ; CHECK-APPLE: malloc -; CHECK-APPLE: mov [[REG:r[0-9]+]], r0 +; CHECK-APPLE: mov r8, r0 ; CHECK-APPLE: mov [[ID:r[0-9]+]], #1 -; CHECK-APPLE-DAG: strb [[ID]], [{{.*}}[[REG]], #8] -; CHECK-APPLE-DAG: mov r8, [[REG]] +; CHECK-APPLE-DAG: strb [[ID]], [r8, #8] entry: %call = call i8* @malloc(i64 16) @@ -348,11 +346,11 @@ ; CHECK-APPLE: mov [[ID:r[0-9]+]], r0 ; CHECK-APPLE: mov r8, #0 ; CHECK-APPLE: bl {{.*}}foo_vararg -; CHECK-APPLE: cmp r8, #0 +; CHECK-APPLE: mov r0, r8 +; CHECK-APPLE: cmp r0, #0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r8, #8] +; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r0, #8] ; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov r0, r8 ; CHECK-APPLE: bl {{.*}}free entry: %error_ptr_ref = alloca swifterror %swift_error* Index: test/CodeGen/BPF/alu8.ll =================================================================== --- test/CodeGen/BPF/alu8.ll +++ test/CodeGen/BPF/alu8.ll @@ -9,22 +9,22 @@ define i8 @add(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: add: -; CHECK: r1 += r2 # encoding: [0x0f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] -; CHECK: r0 = r1 # encoding: [0xbf,0x10,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 = r1 # encoding: [0xbf,0x10,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 += r2 # encoding: [0x0f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %1 = add i8 %a, %b ret i8 %1 } define i8 @and(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: and: -; CHECK: r1 &= r2 # encoding: [0x5f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 &= r2 # encoding: [0x5f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %1 = and i8 %a, %b ret i8 %1 } define i8 @bis(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: bis: -; CHECK: r1 |= r2 # encoding: [0x4f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 |= r2 # encoding: [0x4f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %1 = or i8 %a, %b ret i8 %1 } @@ -39,7 +39,7 @@ define i8 @xor(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: xor: -; CHECK: r1 ^= r2 # encoding: [0xaf,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 ^= r2 # encoding: [0xaf,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %1 = xor i8 %a, %b ret i8 %1 } Index: test/CodeGen/BPF/basictest.ll =================================================================== --- test/CodeGen/BPF/basictest.ll +++ test/CodeGen/BPF/basictest.ll @@ -4,7 +4,7 @@ %tmp.1 = add i32 %X, 1 ret i32 %tmp.1 ; CHECK-LABEL: test0: -; CHECK: r1 += 1 +; CHECK: r0 += 1 } ; CHECK-LABEL: store_imm: Index: test/CodeGen/BPF/cmp.ll =================================================================== --- test/CodeGen/BPF/cmp.ll +++ test/CodeGen/BPF/cmp.ll @@ -17,7 +17,7 @@ %.0 = phi i8 [ %3, %2 ], [ %5, %4 ] ret i8 %.0 ; CHECK-LABEL:foo_cmp1: -; CHECK: if r2 s>= r1 +; CHECK: if r0 s>= r1 } ; Function Attrs: nounwind readnone uwtable @@ -37,7 +37,7 @@ %.0 = phi i8 [ %3, %2 ], [ %5, %4 ] ret i8 %.0 ; CHECK-LABEL:foo_cmp2: -; CHECK: if r2 s> r1 +; CHECK: if r0 s> r1 } ; Function Attrs: nounwind readnone uwtable @@ -57,7 +57,7 @@ %.0 = phi i8 [ %3, %2 ], [ %5, %4 ] ret i8 %.0 ; CHECK-LABEL:foo_cmp3: -; CHECK: if r1 s>= r2 +; CHECK: if r1 s>= r0 } ; Function Attrs: nounwind readnone uwtable @@ -77,7 +77,7 @@ %.0 = phi i8 [ %3, %2 ], [ %5, %4 ] ret i8 %.0 ; CHECK-LABEL:foo_cmp4: -; CHECK: if r1 s> r2 +; CHECK: if r1 s> r0 } ; Function Attrs: nounwind readnone uwtable @@ -86,9 +86,9 @@ %a.b = select i1 %1, i8 %a, i8 %b ret i8 %a.b ; CHECK-LABEL:min: -; CHECK: if r2 s> r1 -; CHECK: r1 = r2 ; CHECK: r0 = r1 +; CHECK: if r2 s> r0 +; CHECK: r0 = r2 } ; Function Attrs: nounwind readnone uwtable @@ -97,7 +97,7 @@ %a.b = select i1 %1, i8 %a, i8 %b ret i8 %a.b ; CHECK-LABEL:minu: -; CHECK: if r3 > r1 +; CHECK: if r1 > r0 } ; Function Attrs: nounwind readnone uwtable @@ -106,7 +106,7 @@ %a.b = select i1 %1, i8 %a, i8 %b ret i8 %a.b ; CHECK-LABEL:max: -; CHECK: if r1 s> r2 +; CHECK: if r0 s> r2 } ; Function Attrs: nounwind readnone uwtable Index: test/CodeGen/BPF/dwarfdump.ll =================================================================== --- test/CodeGen/BPF/dwarfdump.ll +++ test/CodeGen/BPF/dwarfdump.ll @@ -61,4 +61,4 @@ ; CHECK: file_names[ 1] 0 0x00000000 0x00000000 testprog.c ; CHECK: 0x0000000000000000 2 -; CHECK: 0x0000000000000020 7 +; CHECK: 0x0000000000000028 7 Index: test/CodeGen/BPF/intrinsics.ll =================================================================== --- test/CodeGen/BPF/intrinsics.ll +++ test/CodeGen/BPF/intrinsics.ll @@ -83,15 +83,15 @@ ret i32 %conv5 ; CHECK-LABEL: bswap: ; CHECK-EL: r1 = be64 r1 # encoding: [0xdc,0x01,0x00,0x00,0x40,0x00,0x00,0x00] -; CHECK-EL: r2 = be32 r2 # encoding: [0xdc,0x02,0x00,0x00,0x20,0x00,0x00,0x00] -; CHECK-EL: r2 += r1 # encoding: [0x0f,0x12,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK-EL: r0 = be32 r0 # encoding: [0xdc,0x00,0x00,0x00,0x20,0x00,0x00,0x00] +; CHECK-EL: r0 += r1 # encoding: [0x0f,0x10,0x00,0x00,0x00,0x00,0x00,0x00] ; CHECK-EL: r3 = be16 r3 # encoding: [0xdc,0x03,0x00,0x00,0x10,0x00,0x00,0x00] -; CHECK-EL: r2 += r3 # encoding: [0x0f,0x32,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK-EL: r0 += r3 # encoding: [0x0f,0x30,0x00,0x00,0x00,0x00,0x00,0x00] ; CHECK-EB: r1 = le64 r1 # encoding: [0xd4,0x10,0x00,0x00,0x00,0x00,0x00,0x40] -; CHECK-EB: r2 = le32 r2 # encoding: [0xd4,0x20,0x00,0x00,0x00,0x00,0x00,0x20] -; CHECK-EB: r2 += r1 # encoding: [0x0f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK-EB: r0 = le32 r0 # encoding: [0xd4,0x00,0x00,0x00,0x00,0x00,0x00,0x20] +; CHECK-EB: r0 += r1 # encoding: [0x0f,0x01,0x00,0x00,0x00,0x00,0x00,0x00] ; CHECK-EB: r3 = le16 r3 # encoding: [0xd4,0x30,0x00,0x00,0x00,0x00,0x00,0x10] -; CHECK-EB: r2 += r3 # encoding: [0x0f,0x23,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK-EB: r0 += r3 # encoding: [0x0f,0x03,0x00,0x00,0x00,0x00,0x00,0x00] } declare i64 @llvm.bswap.i64(i64) #1 Index: test/CodeGen/BPF/objdump_intrinsics.ll =================================================================== --- test/CodeGen/BPF/objdump_intrinsics.ll +++ test/CodeGen/BPF/objdump_intrinsics.ll @@ -83,15 +83,15 @@ ret i32 %conv5 ; CHECK-LABEL: bswap: ; CHECK-EL: r1 = be64 r1 -; CHECK-EL: r2 = be32 r2 -; CHECK-EL: r2 += r1 +; CHECK-EL: r0 = be32 r0 +; CHECK-EL: r0 += r1 ; CHECK-EL: r3 = be16 r3 -; CHECK-EL: r2 += r3 +; CHECK-EL: r0 += r3 ; CHECK-EB: r1 = le64 r1 -; CHECK-EB: r2 = le32 r2 -; CHECK-EB: r2 += r1 +; CHECK-EB: r0 = le32 r0 +; CHECK-EB: r0 += r1 ; CHECK-EB: r3 = le16 r3 -; CHECK-EB: r2 += r3 +; CHECK-EB: r0 += r3 } declare i64 @llvm.bswap.i64(i64) #1 Index: test/CodeGen/BPF/sanity.ll =================================================================== --- test/CodeGen/BPF/sanity.ll +++ test/CodeGen/BPF/sanity.ll @@ -7,7 +7,7 @@ %1 = add nsw i32 %b, %a ret i32 %1 ; CHECK-LABEL: foo_int: -; CHECK: r2 += r1 +; CHECK: r0 += r1 } ; Function Attrs: nounwind readnone uwtable @@ -15,9 +15,9 @@ %1 = add i8 %b, %a ret i8 %1 ; CHECK-LABEL: foo_char: -; CHECK: r2 += r1 -; CHECK: r2 <<= 56 -; CHECK: r2 s>>= 56 +; CHECK: r0 += r1 +; CHECK: r0 <<= 56 +; CHECK: r0 s>>= 56 } ; Function Attrs: nounwind readnone uwtable @@ -26,9 +26,9 @@ %2 = sub i64 %1, %c ret i64 %2 ; CHECK-LABEL: foo_ll: -; CHECK: r2 += r1 -; CHECK: r2 -= r3 ; CHECK: r0 = r2 +; CHECK: r0 += r1 +; CHECK: r0 -= r3 } ; Function Attrs: nounwind uwtable @@ -60,7 +60,7 @@ %a.b = select i1 %1, i8 %a, i8 %b ret i8 %a.b ; CHECK-LABEL: foo_cmp: -; CHECK: if r2 s> r1 +; CHECK: if r2 s> r0 } ; Function Attrs: nounwind readnone uwtable @@ -82,7 +82,7 @@ %.0 = phi i32 [ %4, %2 ], [ %7, %5 ] ret i32 %.0 ; CHECK-LABEL: foo_muldiv: -; CHECK: r2 *= r3 +; CHECK: r0 *= r3 } ; Function Attrs: nounwind uwtable Index: test/CodeGen/BPF/shifts.ll =================================================================== --- test/CodeGen/BPF/shifts.ll +++ test/CodeGen/BPF/shifts.ll @@ -3,7 +3,7 @@ define zeroext i8 @lshr8(i8 zeroext %a, i8 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: lshr8: -; CHECK: r1 >>= r2 # encoding: [0x7f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 >>= r2 # encoding: [0x7f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shr = lshr i8 %a, %cnt ret i8 %shr } @@ -11,7 +11,7 @@ define signext i8 @ashr8(i8 signext %a, i8 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: ashr8: -; CHECK: r1 s>>= r2 # encoding: [0xcf,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 s>>= r2 # encoding: [0xcf,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shr = ashr i8 %a, %cnt ret i8 %shr } @@ -19,7 +19,7 @@ define zeroext i8 @shl8(i8 zeroext %a, i8 zeroext %cnt) nounwind readnone { entry: ; CHECK: shl8 -; CHECK: r1 <<= r2 # encoding: [0x6f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 <<= r2 # encoding: [0x6f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shl = shl i8 %a, %cnt ret i8 %shl } @@ -27,7 +27,7 @@ define zeroext i16 @lshr16(i16 zeroext %a, i16 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: lshr16: -; CHECK: r1 >>= r2 # encoding: [0x7f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 >>= r2 # encoding: [0x7f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shr = lshr i16 %a, %cnt ret i16 %shr } @@ -35,7 +35,7 @@ define signext i16 @ashr16(i16 signext %a, i16 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: ashr16: -; CHECK: r1 s>>= r2 # encoding: [0xcf,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 s>>= r2 # encoding: [0xcf,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shr = ashr i16 %a, %cnt ret i16 %shr } @@ -43,7 +43,7 @@ define zeroext i16 @shl16(i16 zeroext %a, i16 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: shl16: -; CHECK: r1 <<= r2 # encoding: [0x6f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 <<= r2 # encoding: [0x6f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shl = shl i16 %a, %cnt ret i16 %shl } @@ -51,8 +51,8 @@ define zeroext i32 @lshr32(i32 zeroext %a, i32 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: lshr32: -; CHECK: r1 >>= r2 # encoding: [0x7f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] -; CHECK: r1 <<= 32 # encoding: [0x67,0x01,0x00,0x00,0x20,0x00,0x00,0x00] +; CHECK: r0 >>= r2 # encoding: [0x7f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 <<= 32 # encoding: [0x67,0x00,0x00,0x00,0x20,0x00,0x00,0x00] %shr = lshr i32 %a, %cnt ret i32 %shr } @@ -60,7 +60,7 @@ define signext i32 @ashr32(i32 signext %a, i32 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: ashr32: -; CHECK: r1 s>>= r2 # encoding: [0xcf,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 s>>= r2 # encoding: [0xcf,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shr = ashr i32 %a, %cnt ret i32 %shr } @@ -68,7 +68,7 @@ define zeroext i32 @shl32(i32 zeroext %a, i32 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: shl32: -; CHECK: r1 <<= r2 # encoding: [0x6f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 <<= r2 # encoding: [0x6f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shl = shl i32 %a, %cnt ret i32 %shl } @@ -76,7 +76,7 @@ define zeroext i64 @lshr64(i64 zeroext %a, i64 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: lshr64: -; CHECK: r1 >>= r2 # encoding: [0x7f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 >>= r2 # encoding: [0x7f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shr = lshr i64 %a, %cnt ret i64 %shr } @@ -84,7 +84,7 @@ define signext i64 @ashr64(i64 signext %a, i64 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: ashr64: -; CHECK: r1 s>>= r2 # encoding: [0xcf,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 s>>= r2 # encoding: [0xcf,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shr = ashr i64 %a, %cnt ret i64 %shr } @@ -92,8 +92,8 @@ define zeroext i64 @shl64(i64 zeroext %a, i64 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: shl64: -; CHECK: r1 <<= r2 # encoding: [0x6f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] ; CHECK: r0 = r1 # encoding: [0xbf,0x10,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 <<= r2 # encoding: [0x6f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] ; CHECK: exit # encoding: [0x95,0x00,0x00,0x00,0x00,0x00,0x00,0x00] %shl = shl i64 %a, %cnt ret i64 %shl Index: test/CodeGen/Hexagon/mul64-sext.ll =================================================================== --- test/CodeGen/Hexagon/mul64-sext.ll +++ test/CodeGen/Hexagon/mul64-sext.ll @@ -75,9 +75,9 @@ } ; CHECK-LABEL: mul_nac_2 -; CHECK: r0 = memw(r0+#0) -; CHECK: r5:4 -= mpy(r2,r0) ; CHECK: r1:0 = combine(r5,r4) +; CHECK: r6 = memw(r0+#0) +; CHECK: r1:0 -= mpy(r2,r6) ; CHECK: jumpr r31 define i64 @mul_nac_2(i32* %a0, i64 %a1, i64 %a2) #0 { b3: Index: test/CodeGen/Hexagon/pred-absolute-store.ll =================================================================== --- test/CodeGen/Hexagon/pred-absolute-store.ll +++ test/CodeGen/Hexagon/pred-absolute-store.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=hexagon < %s | FileCheck %s ; Check that we are able to predicate instructions with absolute ; addressing mode. -; CHECK: if ({{!?}}p{{[0-3]}}) memw(##gvar) = r{{[0-9]+}} +; CHECK: if ({{!?}}p{{[0-3]}}.new) memw(##gvar) = r{{[0-9]+}} @gvar = external global i32 define i32 @test2(i32 %a, i32 %b) nounwind { Index: test/CodeGen/Mips/Fast-ISel/sel1.ll =================================================================== --- test/CodeGen/Mips/Fast-ISel/sel1.ll +++ test/CodeGen/Mips/Fast-ISel/sel1.ll @@ -84,11 +84,11 @@ entry: ; CHECK-LABEL: sel_float2: + ; CHECK: mov.s $f0, $f14 ; CHECK-DAG: xor $[[T0:[0-9]+]], $6, $zero ; CHECK: sltu $[[T1:[0-9]+]], $zero, $[[T0]] ; CHECK-NEXT: andi $[[T2:[0-9]+]], $[[T1]], 1 - ; CHECK: movn.s $f14, $f12, $[[T2]] - ; CHECK: mov.s $f0, $f14 + ; CHECK: movn.s $f0, $f12, $[[T2]] %cond = icmp ne i32 %j, 0 %res = select i1 %cond, float %k, float %l ret float %res @@ -114,12 +114,12 @@ entry: ; CHECK-LABEL: sel_double2: + ; CHECK: mov.d $f0, $f14 ; CHECK-DAG: lw $[[SEL:[0-9]+]], 16($sp) ; CHECK-DAG: xor $[[T0:[0-9]+]], $[[SEL]], $zero ; CHECK: sltu $[[T1:[0-9]+]], $zero, $[[T0]] ; CHECK-NEXT: andi $[[T2:[0-9]+]], $[[T1]], 1 - ; CHECK: movn.d $f14, $f12, $[[T2]] - ; CHECK: mov.d $f0, $f14 + ; CHECK: movn.d $f0, $f12, $[[T2]] %cond = icmp ne i32 %j, 0 %res = select i1 %cond, double %k, double %l ret double %res Index: test/CodeGen/Mips/analyzebranch.ll =================================================================== --- test/CodeGen/Mips/analyzebranch.ll +++ test/CodeGen/Mips/analyzebranch.ll @@ -16,7 +16,7 @@ ; 32-GPR: mtc1 $zero, $[[Z:f[0-9]]] ; 32-GPR: mthc1 $zero, $[[Z:f[0-9]]] ; 64-GPR: dmtc1 $zero, $[[Z:f[0-9]]] -; GPR: cmp.lt.d $[[FGRCC:f[0-9]+]], $[[Z]], $f12 +; GPR: cmp.lt.d $[[FGRCC:f[0-9]+]], $[[Z]], $f0 ; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC]] ; GPR-NOT: not $[[GPRCC]], $[[GPRCC]] ; GPR: bnezc $[[GPRCC]], {{\$|\.L}}BB Index: test/CodeGen/Mips/llvm-ir/select-dbl.ll =================================================================== --- test/CodeGen/Mips/llvm-ir/select-dbl.ll +++ test/CodeGen/Mips/llvm-ir/select-dbl.ll @@ -59,15 +59,15 @@ ; M3: andi $[[T0:[0-9]+]], $4, 1 ; M3: bnez $[[T0]], [[BB0:.LBB[0-9_]+]] - ; M3: nop - ; M3: mov.d $f13, $f14 + ; M3: mov.d $f0, $f13 + ; M3: mov.d $f0, $f14 ; M3: [[BB0]]: ; M3: jr $ra - ; M3: mov.d $f0, $f13 + ; M3: nop - ; CMOV-64: andi $[[T0:[0-9]+]], $4, 1 - ; CMOV-64: movn.d $f14, $f13, $[[T0]] ; CMOV-64: mov.d $f0, $f14 + ; CMOV-64: andi $[[T0:[0-9]+]], $4, 1 + ; CMOV-64: movn.d $f0, $f13, $[[T0]] ; SEL-64: mtc1 $4, $f0 ; SEL-64: sel.d $f0, $f14, $f13 @@ -90,16 +90,16 @@ ; M2: lw $[[T0:[0-9]+]], 16($sp) ; M2: andi $[[T1:[0-9]+]], $[[T0]], 1 ; M2: bnez $[[T1]], $[[BB0:BB[0-9_]+]] - ; M2: nop - ; M2: mov.d $f12, $f14 + ; M2: mov.d $f0, $f12 + ; M2: mov.d $f0, $f14 ; M2: $[[BB0]]: ; M2: jr $ra - ; M2: mov.d $f0, $f12 + ; M2: nop + ; CMOV-32: mov.d $f0, $f14 ; CMOV-32: lw $[[T0:[0-9]+]], 16($sp) ; CMOV-32: andi $[[T1:[0-9]+]], $[[T0]], 1 - ; CMOV-32: movn.d $f14, $f12, $[[T1]] - ; CMOV-32: mov.d $f0, $f14 + ; CMOV-32: movn.d $f0, $f12, $[[T1]] ; SEL-32: lw $[[T0:[0-9]+]], 16($sp) ; SEL-32: mtc1 $[[T0]], $f0 @@ -107,23 +107,23 @@ ; M3: andi $[[T0:[0-9]+]], $6, 1 ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] - ; M3: nop - ; M3: mov.d $f12, $f13 + ; M3: mov.d $f0, $f12 + ; M3: mov.d $f0, $f13 ; M3: [[BB0]]: ; M3: jr $ra - ; M3: mov.d $f0, $f12 + ; M3: nop - ; CMOV-64: andi $[[T0:[0-9]+]], $6, 1 - ; CMOV-64: movn.d $f13, $f12, $[[T0]] ; CMOV-64: mov.d $f0, $f13 + ; CMOV-64: andi $[[T0:[0-9]+]], $6, 1 + ; CMOV-64: movn.d $f0, $f12, $[[T0]] ; SEL-64: mtc1 $6, $f0 ; SEL-64: sel.d $f0, $f13, $f12 + ; MM32R3: mov.d $f0, $f14 ; MM32R3: lw $[[T0:[0-9]+]], 16($sp) ; MM32R3: andi16 $[[T1:[0-9]+]], $[[T0:[0-9]+]], 1 - ; MM32R3: movn.d $f14, $f12, $[[T1]] - ; MM32R3: mov.d $f0, $f14 + ; MM32R3: movn.d $f0, $f12, $[[T1]] %r = select i1 %s, double %x, double %y ret double %r @@ -133,34 +133,34 @@ entry: ; ALL-LABEL: tst_select_fcmp_olt_double: - ; M2: c.olt.d $f12, $f14 - ; M3: c.olt.d $f12, $f13 + ; M2-M3: mov.d $f0, $f12 + ; M2: c.olt.d $f0, $f14 + ; M3: c.olt.d $f0, $f13 ; M2: bc1t [[BB0:\$BB[0-9_]+]] ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.d $f12, $f14 - ; M3: mov.d $f12, $f13 + ; M2: mov.d $f0, $f14 + ; M3: mov.d $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.d $f0, $f12 - ; CMOV-32: c.olt.d $f12, $f14 - ; CMOV-32: movt.d $f14, $f12, $fcc0 ; CMOV-32: mov.d $f0, $f14 + ; CMOV-32: c.olt.d $f12, $f0 + ; CMOV-32: movt.d $f0, $f12, $fcc0 ; SEL-32: cmp.lt.d $f0, $f12, $f14 ; SEL-32: sel.d $f0, $f14, $f12 - ; CMOV-64: c.olt.d $f12, $f13 - ; CMOV-64: movt.d $f13, $f12, $fcc0 ; CMOV-64: mov.d $f0, $f13 + ; CMOV-64: c.olt.d $f12, $f0 + ; CMOV-64: movt.d $f0, $f12, $fcc0 ; SEL-64: cmp.lt.d $f0, $f12, $f13 ; SEL-64: sel.d $f0, $f13, $f12 - ; MM32R3: c.olt.d $f12, $f14 - ; MM32R3: movt.d $f14, $f12, $fcc0 ; MM32R3: mov.d $f0, $f14 + ; MM32R3: c.olt.d $f12, $f0 + ; MM32R3: movt.d $f0, $f12, $fcc0 %s = fcmp olt double %x, %y %r = select i1 %s, double %x, double %y @@ -171,34 +171,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_ole_double: - ; M2: c.ole.d $f12, $f14 - ; M3: c.ole.d $f12, $f13 + ; M2-M3: mov.d $f0, $f12 + ; M2: c.ole.d $f0, $f14 + ; M3: c.ole.d $f0, $f13 ; M2: bc1t [[BB0:\$BB[0-9_]+]] ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.d $f12, $f14 - ; M3: mov.d $f12, $f13 + ; M2: mov.d $f0, $f14 + ; M3: mov.d $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.d $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.ole.d $f12, $f14 - ; CMOV-32: movt.d $f14, $f12, $fcc0 ; CMOV-32: mov.d $f0, $f14 + ; CMOV-32: c.ole.d $f12, $f0 + ; CMOV-32: movt.d $f0, $f12, $fcc0 ; SEL-32: cmp.le.d $f0, $f12, $f14 ; SEL-32: sel.d $f0, $f14, $f12 - ; CMOV-64: c.ole.d $f12, $f13 - ; CMOV-64: movt.d $f13, $f12, $fcc0 ; CMOV-64: mov.d $f0, $f13 + ; CMOV-64: c.ole.d $f12, $f0 + ; CMOV-64: movt.d $f0, $f12, $fcc0 ; SEL-64: cmp.le.d $f0, $f12, $f13 ; SEL-64: sel.d $f0, $f13, $f12 - ; MM32R3: c.ole.d $f12, $f14 - ; MM32R3: movt.d $f14, $f12, $fcc0 ; MM32R3: mov.d $f0, $f14 + ; MM32R3: c.ole.d $f12, $f0 + ; MM32R3: movt.d $f0, $f12, $fcc0 %s = fcmp ole double %x, %y %r = select i1 %s, double %x, double %y @@ -209,34 +210,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_ogt_double: - ; M2: c.ule.d $f12, $f14 - ; M3: c.ule.d $f12, $f13 + ; M2-M3: mov.d $f0, $f12 + ; M2: c.ule.d $f0, $f14 + ; M3: c.ule.d $f0, $f13 ; M2: bc1f [[BB0:\$BB[0-9_]+]] ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.d $f12, $f14 - ; M3: mov.d $f12, $f13 + ; M2: mov.d $f0, $f14 + ; M3: mov.d $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.d $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.ule.d $f12, $f14 - ; CMOV-32: movf.d $f14, $f12, $fcc0 ; CMOV-32: mov.d $f0, $f14 + ; CMOV-32: c.ule.d $f12, $f0 + ; CMOV-32: movf.d $f0, $f12, $fcc0 ; SEL-32: cmp.lt.d $f0, $f14, $f12 ; SEL-32: sel.d $f0, $f14, $f12 - ; CMOV-64: c.ule.d $f12, $f13 - ; CMOV-64: movf.d $f13, $f12, $fcc0 ; CMOV-64: mov.d $f0, $f13 + ; CMOV-64: c.ule.d $f12, $f0 + ; CMOV-64: movf.d $f0, $f12, $fcc0 ; SEL-64: cmp.lt.d $f0, $f13, $f12 ; SEL-64: sel.d $f0, $f13, $f12 - ; MM32R3: c.ule.d $f12, $f14 - ; MM32R3: movf.d $f14, $f12, $fcc0 ; MM32R3: mov.d $f0, $f14 + ; MM32R3: c.ule.d $f12, $f0 + ; MM32R3: movf.d $f0, $f12, $fcc0 %s = fcmp ogt double %x, %y %r = select i1 %s, double %x, double %y @@ -247,34 +249,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_oge_double: - ; M2: c.ult.d $f12, $f14 - ; M3: c.ult.d $f12, $f13 + ; M2-M3: mov.d $f0, $f12 + ; M2: c.ult.d $f0, $f14 + ; M3: c.ult.d $f0, $f13 ; M2: bc1f [[BB0:\$BB[0-9_]+]] ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.d $f12, $f14 - ; M3: mov.d $f12, $f13 + ; M2: mov.d $f0, $f14 + ; M3: mov.d $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.d $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.ult.d $f12, $f14 - ; CMOV-32: movf.d $f14, $f12, $fcc0 ; CMOV-32: mov.d $f0, $f14 + ; CMOV-32: c.ult.d $f12, $f0 + ; CMOV-32: movf.d $f0, $f12, $fcc0 ; SEL-32: cmp.le.d $f0, $f14, $f12 ; SEL-32: sel.d $f0, $f14, $f12 - ; CMOV-64: c.ult.d $f12, $f13 - ; CMOV-64: movf.d $f13, $f12, $fcc0 ; CMOV-64: mov.d $f0, $f13 + ; CMOV-64: c.ult.d $f12, $f0 + ; CMOV-64: movf.d $f0, $f12, $fcc0 ; SEL-64: cmp.le.d $f0, $f13, $f12 ; SEL-64: sel.d $f0, $f13, $f12 - ; MM32R3: c.ult.d $f12, $f14 - ; MM32R3: movf.d $f14, $f12, $fcc0 ; MM32R3: mov.d $f0, $f14 + ; MM32R3: c.ult.d $f12, $f0 + ; MM32R3: movf.d $f0, $f12, $fcc0 %s = fcmp oge double %x, %y %r = select i1 %s, double %x, double %y @@ -285,34 +288,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_oeq_double: - ; M2: c.eq.d $f12, $f14 - ; M3: c.eq.d $f12, $f13 + ; M2-M3: mov.d $f0, $f12 + ; M2: c.eq.d $f0, $f14 + ; M3: c.eq.d $f0, $f13 ; M2: bc1t [[BB0:\$BB[0-9_]+]] ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.d $f12, $f14 - ; M3: mov.d $f12, $f13 + ; M2: mov.d $f0, $f14 + ; M3: mov.d $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.d $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.eq.d $f12, $f14 - ; CMOV-32: movt.d $f14, $f12, $fcc0 ; CMOV-32: mov.d $f0, $f14 + ; CMOV-32: c.eq.d $f12, $f0 + ; CMOV-32: movt.d $f0, $f12, $fcc0 ; SEL-32: cmp.eq.d $f0, $f12, $f14 ; SEL-32: sel.d $f0, $f14, $f12 - ; CMOV-64: c.eq.d $f12, $f13 - ; CMOV-64: movt.d $f13, $f12, $fcc0 ; CMOV-64: mov.d $f0, $f13 + ; CMOV-64: c.eq.d $f12, $f0 + ; CMOV-64: movt.d $f0, $f12, $fcc0 ; SEL-64: cmp.eq.d $f0, $f12, $f13 ; SEL-64: sel.d $f0, $f13, $f12 - ; MM32R3: c.eq.d $f12, $f14 - ; MM32R3: movt.d $f14, $f12, $fcc0 ; MM32R3: mov.d $f0, $f14 + ; MM32R3: c.eq.d $f12, $f0 + ; MM32R3: movt.d $f0, $f12, $fcc0 %s = fcmp oeq double %x, %y %r = select i1 %s, double %x, double %y @@ -323,20 +327,21 @@ entry: ; ALL-LABEL: tst_select_fcmp_one_double: - ; M2: c.ueq.d $f12, $f14 - ; M3: c.ueq.d $f12, $f13 + ; M2-M3: mov.d $f0, $f12 + ; M2: c.ueq.d $f0, $f14 + ; M3: c.ueq.d $f0, $f13 ; M2: bc1f [[BB0:\$BB[0-9_]+]] ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.d $f12, $f14 - ; M3: mov.d $f12, $f13 + ; M2: mov.d $f0, $f14 + ; M3: mov.d $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.d $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.ueq.d $f12, $f14 - ; CMOV-32: movf.d $f14, $f12, $fcc0 ; CMOV-32: mov.d $f0, $f14 + ; CMOV-32: c.ueq.d $f12, $f0 + ; CMOV-32: movf.d $f0, $f12, $fcc0 ; SEL-32: cmp.ueq.d $f0, $f12, $f14 ; SEL-32: mfc1 $[[T0:[0-9]+]], $f0 @@ -344,9 +349,9 @@ ; SEL-32: mtc1 $[[T0:[0-9]+]], $f0 ; SEL-32: sel.d $f0, $f14, $f12 - ; CMOV-64: c.ueq.d $f12, $f13 - ; CMOV-64: movf.d $f13, $f12, $fcc0 ; CMOV-64: mov.d $f0, $f13 + ; CMOV-64: c.ueq.d $f12, $f0 + ; CMOV-64: movf.d $f0, $f12, $fcc0 ; SEL-64: cmp.ueq.d $f0, $f12, $f13 ; SEL-64: mfc1 $[[T0:[0-9]+]], $f0 @@ -354,9 +359,9 @@ ; SEL-64: mtc1 $[[T0:[0-9]+]], $f0 ; SEL-64: sel.d $f0, $f13, $f12 - ; MM32R3: c.ueq.d $f12, $f14 - ; MM32R3: movf.d $f14, $f12, $fcc0 ; MM32R3: mov.d $f0, $f14 + ; MM32R3: c.ueq.d $f12, $f0 + ; MM32R3: movf.d $f0, $f12, $fcc0 %s = fcmp one double %x, %y %r = select i1 %s, double %x, double %y Index: test/CodeGen/Mips/llvm-ir/select-flt.ll =================================================================== --- test/CodeGen/Mips/llvm-ir/select-flt.ll +++ test/CodeGen/Mips/llvm-ir/select-flt.ll @@ -36,14 +36,14 @@ ; M2-M3: andi $[[T0:[0-9]+]], $4, 1 ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]] ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] - ; M2-M3: nop + ; M3: mov.s $f0, $f13 + ; M3: mov.s $f0, $f14 + ; M2: nop ; M2: jr $ra ; M2: mtc1 $6, $f0 - ; M3: mov.s $f13, $f14 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2: mtc1 $5, $f0 - ; M3: mov.s $f0, $f13 ; CMOV-32: mtc1 $6, $f0 ; CMOV-32: andi $[[T0:[0-9]+]], $4, 1 @@ -55,9 +55,9 @@ ; SEL-32: mtc1 $4, $f0 ; SEL-32: sel.s $f0, $[[F1]], $[[F0]] - ; CMOV-64: andi $[[T0:[0-9]+]], $4, 1 - ; CMOV-64: movn.s $f14, $f13, $[[T0]] ; CMOV-64: mov.s $f0, $f14 + ; CMOV-64: andi $[[T0:[0-9]+]], $4, 1 + ; CMOV-64: movn.s $f0, $f13, $[[T0]] ; SEL-64: mtc1 $4, $f0 ; SEL-64: sel.s $f0, $f14, $f13 @@ -79,30 +79,30 @@ ; M2-M3: andi $[[T0:[0-9]+]], $6, 1 ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]] ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] - ; M2-M3: nop - ; M2: mov.s $f12, $f14 - ; M3: mov.s $f12, $f13 + ; M2-M3: mov.s $f0, $f12 + ; M2: mov.s $f0, $f14 + ; M3: mov.s $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.s $f0, $f12 + ; M2-M3: nop - ; CMOV-32: andi $[[T0:[0-9]+]], $6, 1 - ; CMOV-32: movn.s $f14, $f12, $[[T0]] ; CMOV-32: mov.s $f0, $f14 + ; CMOV-32: andi $[[T0:[0-9]+]], $6, 1 + ; CMOV-32: movn.s $f0, $f12, $[[T0]] ; SEL-32: mtc1 $6, $f0 ; SEL-32: sel.s $f0, $f14, $f12 - ; CMOV-64: andi $[[T0:[0-9]+]], $6, 1 - ; CMOV-64: movn.s $f13, $f12, $[[T0]] ; CMOV-64: mov.s $f0, $f13 + ; CMOV-64: andi $[[T0:[0-9]+]], $6, 1 + ; CMOV-64: movn.s $f0, $f12, $[[T0]] ; SEL-64: mtc1 $6, $f0 ; SEL-64: sel.s $f0, $f13, $f12 + ; MM32R3: mov.s $[[F0:f[0-9]+]], $f14 ; MM32R3: andi16 $[[T0:[0-9]+]], $6, 1 - ; MM32R3: movn.s $[[F0:f[0-9]+]], $f12, $[[T0]] - ; MM32R3: mov.s $f0, $[[F0]] + ; MM32R3: movn.s $[[F0]], $f12, $[[T0]] %r = select i1 %s, float %x, float %y ret float %r @@ -112,34 +112,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_olt_float: - ; M2: c.olt.s $f12, $f14 - ; M3: c.olt.s $f12, $f13 + ; M2-M3: mov.s $f0, $f12 + ; M2: c.olt.s $f0, $f14 + ; M3: c.olt.s $f0, $f13 ; M2: bc1t [[BB0:\$BB[0-9_]+]] ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.s $f12, $f14 - ; M3: mov.s $f12, $f13 + ; M2: mov.s $f0, $f14 + ; M3: mov.s $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.s $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.olt.s $f12, $f14 - ; CMOV-32: movt.s $f14, $f12, $fcc0 ; CMOV-32: mov.s $f0, $f14 + ; CMOV-32: c.olt.s $f12, $f0 + ; CMOV-32: movt.s $f0, $f12, $fcc0 ; SEL-32: cmp.lt.s $f0, $f12, $f14 ; SEL-32: sel.s $f0, $f14, $f12 - ; CMOV-64: c.olt.s $f12, $f13 - ; CMOV-64: movt.s $f13, $f12, $fcc0 ; CMOV-64: mov.s $f0, $f13 + ; CMOV-64: c.olt.s $f12, $f0 + ; CMOV-64: movt.s $f0, $f12, $fcc0 ; SEL-64: cmp.lt.s $f0, $f12, $f13 ; SEL-64: sel.s $f0, $f13, $f12 - ; MM32R3: c.olt.s $f12, $f14 - ; MM32R3: movt.s $f14, $f12, $fcc0 ; MM32R3: mov.s $f0, $f14 + ; MM32R3: c.olt.s $f12, $f0 + ; MM32R3: movt.s $f0, $f12, $fcc0 %s = fcmp olt float %x, %y %r = select i1 %s, float %x, float %y @@ -150,34 +151,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_ole_float: - ; M2: c.ole.s $f12, $f14 - ; M3: c.ole.s $f12, $f13 + ; M2-M3: mov.s $f0, $f12 + ; M2: c.ole.s $f0, $f14 + ; M3: c.ole.s $f0, $f13 ; M2: bc1t [[BB0:\$BB[0-9_]+]] ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.s $f12, $f14 - ; M3: mov.s $f12, $f13 + ; M2: mov.s $f0, $f14 + ; M3: mov.s $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.s $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.ole.s $f12, $f14 - ; CMOV-32: movt.s $f14, $f12, $fcc0 ; CMOV-32: mov.s $f0, $f14 + ; CMOV-32: c.ole.s $f12, $f0 + ; CMOV-32: movt.s $f0, $f12, $fcc0 ; SEL-32: cmp.le.s $f0, $f12, $f14 ; SEL-32: sel.s $f0, $f14, $f12 - ; CMOV-64: c.ole.s $f12, $f13 - ; CMOV-64: movt.s $f13, $f12, $fcc0 ; CMOV-64: mov.s $f0, $f13 + ; CMOV-64: c.ole.s $f12, $f0 + ; CMOV-64: movt.s $f0, $f12, $fcc0 ; SEL-64: cmp.le.s $f0, $f12, $f13 ; SEL-64: sel.s $f0, $f13, $f12 - ; MM32R3: c.ole.s $f12, $f14 - ; MM32R3: movt.s $f14, $f12, $fcc0 ; MM32R3: mov.s $f0, $f14 + ; MM32R3: c.ole.s $f12, $f0 + ; MM32R3: movt.s $f0, $f12, $fcc0 %s = fcmp ole float %x, %y %r = select i1 %s, float %x, float %y @@ -188,34 +190,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_ogt_float: - ; M2: c.ule.s $f12, $f14 - ; M3: c.ule.s $f12, $f13 + ; M2-M3: mov.s $f0, $f12 + ; M2: c.ule.s $f0, $f14 + ; M3: c.ule.s $f0, $f13 ; M2: bc1f [[BB0:\$BB[0-9_]+]] ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.s $f12, $f14 - ; M3: mov.s $f12, $f13 + ; M2: mov.s $f0, $f14 + ; M3: mov.s $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.s $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.ule.s $f12, $f14 - ; CMOV-32: movf.s $f14, $f12, $fcc0 ; CMOV-32: mov.s $f0, $f14 + ; CMOV-32: c.ule.s $f12, $f0 + ; CMOV-32: movf.s $f0, $f12, $fcc0 ; SEL-32: cmp.lt.s $f0, $f14, $f12 ; SEL-32: sel.s $f0, $f14, $f12 - ; CMOV-64: c.ule.s $f12, $f13 - ; CMOV-64: movf.s $f13, $f12, $fcc0 ; CMOV-64: mov.s $f0, $f13 + ; CMOV-64: c.ule.s $f12, $f0 + ; CMOV-64: movf.s $f0, $f12, $fcc0 ; SEL-64: cmp.lt.s $f0, $f13, $f12 ; SEL-64: sel.s $f0, $f13, $f12 - ; MM32R3: c.ule.s $f12, $f14 - ; MM32R3: movf.s $f14, $f12, $fcc0 ; MM32R3: mov.s $f0, $f14 + ; MM32R3: c.ule.s $f12, $f0 + ; MM32R3: movf.s $f0, $f12, $fcc0 %s = fcmp ogt float %x, %y %r = select i1 %s, float %x, float %y @@ -226,34 +229,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_oge_float: - ; M2: c.ult.s $f12, $f14 - ; M3: c.ult.s $f12, $f13 + ; M2-M3: mov.s $f0, $f12 + ; M2: c.ult.s $f0, $f14 + ; M3: c.ult.s $f0, $f13 ; M2: bc1f [[BB0:\$BB[0-9_]+]] ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.s $f12, $f14 - ; M3: mov.s $f12, $f13 + ; M2: mov.s $f0, $f14 + ; M3: mov.s $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.s $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.ult.s $f12, $f14 - ; CMOV-32: movf.s $f14, $f12, $fcc0 ; CMOV-32: mov.s $f0, $f14 + ; CMOV-32: c.ult.s $f12, $f0 + ; CMOV-32: movf.s $f0, $f12, $fcc0 ; SEL-32: cmp.le.s $f0, $f14, $f12 ; SEL-32: sel.s $f0, $f14, $f12 - ; CMOV-64: c.ult.s $f12, $f13 - ; CMOV-64: movf.s $f13, $f12, $fcc0 ; CMOV-64: mov.s $f0, $f13 + ; CMOV-64: c.ult.s $f12, $f0 + ; CMOV-64: movf.s $f0, $f12, $fcc0 ; SEL-64: cmp.le.s $f0, $f13, $f12 ; SEL-64: sel.s $f0, $f13, $f12 - ; MM32R3: c.ult.s $f12, $f14 - ; MM32R3: movf.s $f14, $f12, $fcc0 ; MM32R3: mov.s $f0, $f14 + ; MM32R3: c.ult.s $f12, $f0 + ; MM32R3: movf.s $f0, $f12, $fcc0 %s = fcmp oge float %x, %y %r = select i1 %s, float %x, float %y @@ -264,34 +268,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_oeq_float: - ; M2: c.eq.s $f12, $f14 - ; M3: c.eq.s $f12, $f13 + ; M2-M3: mov.s $f0, $f12 + ; M2: c.eq.s $f0, $f14 + ; M3: c.eq.s $f0, $f13 ; M2: bc1t [[BB0:\$BB[0-9_]+]] ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.s $f12, $f14 - ; M3: mov.s $f12, $f13 + ; M2: mov.s $f0, $f14 + ; M3: mov.s $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.s $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.eq.s $f12, $f14 - ; CMOV-32: movt.s $f14, $f12, $fcc0 ; CMOV-32: mov.s $f0, $f14 + ; CMOV-32: c.eq.s $f12, $f0 + ; CMOV-32: movt.s $f0, $f12, $fcc0 ; SEL-32: cmp.eq.s $f0, $f12, $f14 ; SEL-32: sel.s $f0, $f14, $f12 - ; CMOV-64: c.eq.s $f12, $f13 - ; CMOV-64: movt.s $f13, $f12, $fcc0 ; CMOV-64: mov.s $f0, $f13 + ; CMOV-64: c.eq.s $f12, $f0 + ; CMOV-64: movt.s $f0, $f12, $fcc0 ; SEL-64: cmp.eq.s $f0, $f12, $f13 ; SEL-64: sel.s $f0, $f13, $f12 - ; MM32R3: c.eq.s $f12, $f14 - ; MM32R3: movt.s $f14, $f12, $fcc0 ; MM32R3: mov.s $f0, $f14 + ; MM32R3: c.eq.s $f12, $f0 + ; MM32R3: movt.s $f0, $f12, $fcc0 %s = fcmp oeq float %x, %y %r = select i1 %s, float %x, float %y @@ -302,20 +307,21 @@ entry: ; ALL-LABEL: tst_select_fcmp_one_float: - ; M2: c.ueq.s $f12, $f14 - ; M3: c.ueq.s $f12, $f13 + ; M2-M3: mov.s $f0, $f12 + ; M2: c.ueq.s $f0, $f14 + ; M3: c.ueq.s $f0, $f13 ; M2: bc1f [[BB0:\$BB[0-9_]+]] ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.s $f12, $f14 - ; M3: mov.s $f12, $f13 + ; M2: mov.s $f0, $f14 + ; M3: mov.s $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.s $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.ueq.s $f12, $f14 - ; CMOV-32: movf.s $f14, $f12, $fcc0 ; CMOV-32: mov.s $f0, $f14 + ; CMOV-32: c.ueq.s $f12, $f0 + ; CMOV-32: movf.s $f0, $f12, $fcc0 ; SEL-32: cmp.ueq.s $f0, $f12, $f14 ; SEL-32: mfc1 $[[T0:[0-9]+]], $f0 @@ -323,9 +329,9 @@ ; SEL-32: mtc1 $[[T0:[0-9]+]], $f0 ; SEL-32: sel.s $f0, $f14, $f12 - ; CMOV-64: c.ueq.s $f12, $f13 - ; CMOV-64: movf.s $f13, $f12, $fcc0 ; CMOV-64: mov.s $f0, $f13 + ; CMOV-64: c.ueq.s $f12, $f0 + ; CMOV-64: movf.s $f0, $f12, $fcc0 ; SEL-64: cmp.ueq.s $f0, $f12, $f13 ; SEL-64: mfc1 $[[T0:[0-9]+]], $f0 @@ -333,9 +339,9 @@ ; SEL-64: mtc1 $[[T0:[0-9]+]], $f0 ; SEL-64: sel.s $f0, $f13, $f12 - ; MM32R3: c.ueq.s $f12, $f14 - ; MM32R3: movf.s $f14, $f12, $fcc0 ; MM32R3: mov.s $f0, $f14 + ; MM32R3: c.ueq.s $f12, $f0 + ; MM32R3: movf.s $f0, $f12, $fcc0 %s = fcmp one float %x, %y %r = select i1 %s, float %x, float %y Index: test/CodeGen/Mips/o32_cc_byval.ll =================================================================== --- test/CodeGen/Mips/o32_cc_byval.ll +++ test/CodeGen/Mips/o32_cc_byval.ll @@ -97,14 +97,14 @@ define void @f4(float %f, %struct.S3* nocapture byval %s3, %struct.S1* nocapture byval %s1) nounwind { entry: ; CHECK: addiu $sp, $sp, -48 -; CHECK-DAG: sw $7, 60($sp) +; CHECK: move $4, $7 +; CHECK-DAG: sw $4, 60($sp) ; CHECK-DAG: sw $6, 56($sp) ; CHECK-DAG: sw $5, 52($sp) ; CHECK-DAG: lw $[[R1:[0-9]+]], 80($sp) ; CHECK-DAG: lb $[[R0:[0-9]+]], 52($sp) ; CHECK-DAG: sw $[[R0]], 32($sp) ; CHECK-DAG: sw $[[R1]], 24($sp) -; CHECK: move $4, $7 %i = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 2 %tmp = load i32, i32* %i, align 4 Index: test/CodeGen/Mips/select.ll =================================================================== --- test/CodeGen/Mips/select.ll +++ test/CodeGen/Mips/select.ll @@ -147,11 +147,11 @@ ; 32R6: mtc1 $[[T0]], $[[CC:f0]] ; 32R6: sel.s $[[CC]], $[[F1]], $[[F0]] -; 64: movn.s $f14, $f13, $4 ; 64: mov.s $f0, $f14 +; 64: movn.s $f0, $f13, $4 -; 64R2: movn.s $f14, $f13, $4 ; 64R2: mov.s $f0, $f14 +; 64R2: movn.s $f0, $f13, $4 ; 64R6: sltu $[[T0:[0-9]+]], $zero, $4 ; 64R6: mtc1 $[[T0]], $[[CC:f0]] @@ -183,11 +183,11 @@ ; 32R6-DAG: ldc1 $[[F1:f[0-9]+]], 16($sp) ; 32R6: sel.d $[[CC]], $[[F1]], $[[F0]] -; 64: movn.d $f14, $f13, $4 ; 64: mov.d $f0, $f14 +; 64: movn.d $f0, $f13, $4 -; 64R2: movn.d $f14, $f13, $4 ; 64R2: mov.d $f0, $f14 +; 64R2: movn.d $f0, $f13, $4 ; 64R6-DAG: sltu $[[T0:[0-9]+]], $zero, $4 ; 64R6-DAG: mtc1 $[[T0]], $[[CC:f0]] @@ -202,30 +202,30 @@ entry: ; ALL-LABEL: f32_fcmp_oeq_f32_val: +; 32: mov.s $f0, $f14 ; 32-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32: c.eq.s $[[F2]], $[[F3]] -; 32: movt.s $f14, $f12, $fcc0 -; 32: mov.s $f0, $f14 +; 32: movt.s $f0, $f12, $fcc0 +; 32R2: mov.s $f0, $f14 ; 32R2-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R2-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32R2: c.eq.s $[[F2]], $[[F3]] -; 32R2: movt.s $f14, $f12, $fcc0 -; 32R2: mov.s $f0, $f14 +; 32R2: movt.s $f0, $f12, $fcc0 ; 32R6-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R6-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32R6: cmp.eq.s $[[CC:f0]], $[[F2]], $[[F3]] ; 32R6: sel.s $[[CC]], $f14, $f12 -; 64: c.eq.s $f14, $f15 -; 64: movt.s $f13, $f12, $fcc0 ; 64: mov.s $f0, $f13 +; 64: c.eq.s $f14, $f15 +; 64: movt.s $f0, $f12, $fcc0 -; 64R2: c.eq.s $f14, $f15 -; 64R2: movt.s $f13, $f12, $fcc0 ; 64R2: mov.s $f0, $f13 +; 64R2: c.eq.s $f14, $f15 +; 64R2: movt.s $f0, $f12, $fcc0 ; 64R6: cmp.eq.s $[[CC:f0]], $f14, $f15 ; 64R6: sel.s $[[CC]], $f13, $f12 @@ -239,30 +239,30 @@ entry: ; ALL-LABEL: f32_fcmp_olt_f32_val: +; 32: mov.s $f0, $f14 ; 32-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32: c.olt.s $[[F2]], $[[F3]] -; 32: movt.s $f14, $f12, $fcc0 -; 32: mov.s $f0, $f14 +; 32: movt.s $f0, $f12, $fcc0 +; 32R2: mov.s $f0, $f14 ; 32R2-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R2-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32R2: c.olt.s $[[F2]], $[[F3]] -; 32R2: movt.s $f14, $f12, $fcc0 -; 32R2: mov.s $f0, $f14 +; 32R2: movt.s $f0, $f12, $fcc0 ; 32R6-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R6-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32R6: cmp.lt.s $[[CC:f0]], $[[F2]], $[[F3]] ; 32R6: sel.s $[[CC]], $f14, $f12 -; 64: c.olt.s $f14, $f15 -; 64: movt.s $f13, $f12, $fcc0 ; 64: mov.s $f0, $f13 +; 64: c.olt.s $f14, $f15 +; 64: movt.s $f0, $f12, $fcc0 -; 64R2: c.olt.s $f14, $f15 -; 64R2: movt.s $f13, $f12, $fcc0 ; 64R2: mov.s $f0, $f13 +; 64R2: c.olt.s $f14, $f15 +; 64R2: movt.s $f0, $f12, $fcc0 ; 64R6: cmp.lt.s $[[CC:f0]], $f14, $f15 ; 64R6: sel.s $[[CC]], $f13, $f12 @@ -276,30 +276,30 @@ entry: ; ALL-LABEL: f32_fcmp_ogt_f32_val: +; 32: mov.s $f0, $f14 ; 32-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32: c.ule.s $[[F2]], $[[F3]] -; 32: movf.s $f14, $f12, $fcc0 -; 32: mov.s $f0, $f14 +; 32: movf.s $f0, $f12, $fcc0 +; 32R2: mov.s $f0, $f14 ; 32R2-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R2-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32R2: c.ule.s $[[F2]], $[[F3]] -; 32R2: movf.s $f14, $f12, $fcc0 -; 32R2: mov.s $f0, $f14 +; 32R2: movf.s $f0, $f12, $fcc0 ; 32R6-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R6-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32R6: cmp.lt.s $[[CC:f0]], $[[F3]], $[[F2]] ; 32R6: sel.s $[[CC]], $f14, $f12 -; 64: c.ule.s $f14, $f15 -; 64: movf.s $f13, $f12, $fcc0 ; 64: mov.s $f0, $f13 +; 64: c.ule.s $f14, $f15 +; 64: movf.s $f0, $f12, $fcc0 -; 64R2: c.ule.s $f14, $f15 -; 64R2: movf.s $f13, $f12, $fcc0 ; 64R2: mov.s $f0, $f13 +; 64R2: c.ule.s $f14, $f15 +; 64R2: movf.s $f0, $f12, $fcc0 ; 64R6: cmp.lt.s $[[CC:f0]], $f15, $f14 ; 64R6: sel.s $[[CC]], $f13, $f12 @@ -313,30 +313,30 @@ entry: ; ALL-LABEL: f32_fcmp_ogt_f64_val: +; 32: mov.d $f0, $f14 ; 32-DAG: lwc1 $[[F2:f[0-9]+]], 16($sp) ; 32-DAG: lwc1 $[[F3:f[0-9]+]], 20($sp) ; 32: c.ule.s $[[F2]], $[[F3]] -; 32: movf.d $f14, $f12, $fcc0 -; 32: mov.d $f0, $f14 +; 32: movf.d $f0, $f12, $fcc0 +; 32R2: mov.d $f0, $f14 ; 32R2-DAG: lwc1 $[[F2:f[0-9]+]], 16($sp) ; 32R2-DAG: lwc1 $[[F3:f[0-9]+]], 20($sp) ; 32R2: c.ule.s $[[F2]], $[[F3]] -; 32R2: movf.d $f14, $f12, $fcc0 -; 32R2: mov.d $f0, $f14 +; 32R2: movf.d $f0, $f12, $fcc0 ; 32R6-DAG: lwc1 $[[F2:f[0-9]+]], 16($sp) ; 32R6-DAG: lwc1 $[[F3:f[0-9]+]], 20($sp) ; 32R6: cmp.lt.s $[[CC:f0]], $[[F3]], $[[F2]] ; 32R6: sel.d $[[CC]], $f14, $f12 -; 64: c.ule.s $f14, $f15 -; 64: movf.d $f13, $f12, $fcc0 ; 64: mov.d $f0, $f13 +; 64: c.ule.s $f14, $f15 +; 64: movf.d $f0, $f12, $fcc0 -; 64R2: c.ule.s $f14, $f15 -; 64R2: movf.d $f13, $f12, $fcc0 ; 64R2: mov.d $f0, $f13 +; 64R2: c.ule.s $f14, $f15 +; 64R2: movf.d $f0, $f12, $fcc0 ; 64R6: cmp.lt.s $[[CC:f0]], $f15, $f14 ; 64R6: sel.d $[[CC]], $f13, $f12 @@ -350,30 +350,30 @@ entry: ; ALL-LABEL: f64_fcmp_oeq_f64_val: +; 32: mov.d $f0, $f14 ; 32-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32: c.eq.d $[[F2]], $[[F3]] -; 32: movt.d $f14, $f12, $fcc0 -; 32: mov.d $f0, $f14 +; 32: movt.d $f0, $f12, $fcc0 +; 32R2: mov.d $f0, $f14 ; 32R2-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32R2-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32R2: c.eq.d $[[F2]], $[[F3]] -; 32R2: movt.d $f14, $f12, $fcc0 -; 32R2: mov.d $f0, $f14 +; 32R2: movt.d $f0, $f12, $fcc0 ; 32R6-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32R6-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32R6: cmp.eq.d $[[CC:f0]], $[[F2]], $[[F3]] ; 32R6: sel.d $[[CC]], $f14, $f12 -; 64: c.eq.d $f14, $f15 -; 64: movt.d $f13, $f12, $fcc0 ; 64: mov.d $f0, $f13 +; 64: c.eq.d $f14, $f15 +; 64: movt.d $f0, $f12, $fcc0 -; 64R2: c.eq.d $f14, $f15 -; 64R2: movt.d $f13, $f12, $fcc0 ; 64R2: mov.d $f0, $f13 +; 64R2: c.eq.d $f14, $f15 +; 64R2: movt.d $f0, $f12, $fcc0 ; 64R6: cmp.eq.d $[[CC:f0]], $f14, $f15 ; 64R6: sel.d $[[CC]], $f13, $f12 @@ -387,30 +387,30 @@ entry: ; ALL-LABEL: f64_fcmp_olt_f64_val: +; 32: mov.d $f0, $f14 ; 32-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32: c.olt.d $[[F2]], $[[F3]] -; 32: movt.d $f14, $f12, $fcc0 -; 32: mov.d $f0, $f14 +; 32: movt.d $f0, $f12, $fcc0 +; 32R2: mov.d $f0, $f14 ; 32R2-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32R2-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32R2: c.olt.d $[[F2]], $[[F3]] -; 32R2: movt.d $f14, $f12, $fcc0 -; 32R2: mov.d $f0, $f14 +; 32R2: movt.d $f0, $f12, $fcc0 ; 32R6-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32R6-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32R6: cmp.lt.d $[[CC:f0]], $[[F2]], $[[F3]] ; 32R6: sel.d $[[CC]], $f14, $f12 -; 64: c.olt.d $f14, $f15 -; 64: movt.d $f13, $f12, $fcc0 ; 64: mov.d $f0, $f13 +; 64: c.olt.d $f14, $f15 +; 64: movt.d $f0, $f12, $fcc0 -; 64R2: c.olt.d $f14, $f15 -; 64R2: movt.d $f13, $f12, $fcc0 ; 64R2: mov.d $f0, $f13 +; 64R2: c.olt.d $f14, $f15 +; 64R2: movt.d $f0, $f12, $fcc0 ; 64R6: cmp.lt.d $[[CC:f0]], $f14, $f15 ; 64R6: sel.d $[[CC]], $f13, $f12 @@ -424,30 +424,30 @@ entry: ; ALL-LABEL: f64_fcmp_ogt_f64_val: +; 32: mov.d $f0, $f14 ; 32-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32: c.ule.d $[[F2]], $[[F3]] -; 32: movf.d $f14, $f12, $fcc0 -; 32: mov.d $f0, $f14 +; 32: movf.d $f0, $f12, $fcc0 +; 32R2: mov.d $f0, $f14 ; 32R2-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32R2-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32R2: c.ule.d $[[F2]], $[[F3]] -; 32R2: movf.d $f14, $f12, $fcc0 -; 32R2: mov.d $f0, $f14 +; 32R2: movf.d $f0, $f12, $fcc0 ; 32R6-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32R6-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32R6: cmp.lt.d $[[CC:f0]], $[[F3]], $[[F2]] ; 32R6: sel.d $[[CC]], $f14, $f12 -; 64: c.ule.d $f14, $f15 -; 64: movf.d $f13, $f12, $fcc0 ; 64: mov.d $f0, $f13 +; 64: c.ule.d $f14, $f15 +; 64: movf.d $f0, $f12, $fcc0 -; 64R2: c.ule.d $f14, $f15 -; 64R2: movf.d $f13, $f12, $fcc0 ; 64R2: mov.d $f0, $f13 +; 64R2: c.ule.d $f14, $f15 +; 64R2: movf.d $f0, $f12, $fcc0 ; 64R6: cmp.lt.d $[[CC:f0]], $f15, $f14 ; 64R6: sel.d $[[CC]], $f13, $f12 @@ -461,19 +461,19 @@ entry: ; ALL-LABEL: f64_fcmp_ogt_f32_val: +; 32: mov.s $f0, $f14 ; 32-DAG: mtc1 $6, $[[F2:f[1-3]*[02468]+]] ; 32-DAG: mtc1 $7, $[[F2H:f[1-3]*[13579]+]] ; 32-DAG: ldc1 $[[F3:f[0-9]+]], 16($sp) ; 32: c.ule.d $[[F2]], $[[F3]] -; 32: movf.s $f14, $f12, $fcc0 -; 32: mov.s $f0, $f14 +; 32: movf.s $f0, $f12, $fcc0 +; 32R2: mov.s $f0, $f14 ; 32R2-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R2-DAG: mthc1 $7, $[[F2]] ; 32R2-DAG: ldc1 $[[F3:f[0-9]+]], 16($sp) ; 32R2: c.ule.d $[[F2]], $[[F3]] -; 32R2: movf.s $f14, $f12, $fcc0 -; 32R2: mov.s $f0, $f14 +; 32R2: movf.s $f0, $f12, $fcc0 ; 32R6-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R6-DAG: mthc1 $7, $[[F2]] @@ -481,13 +481,13 @@ ; 32R6: cmp.lt.d $[[CC:f0]], $[[F3]], $[[F2]] ; 32R6: sel.s $[[CC]], $f14, $f12 -; 64: c.ule.d $f14, $f15 -; 64: movf.s $f13, $f12, $fcc0 ; 64: mov.s $f0, $f13 +; 64: c.ule.d $f14, $f15 +; 64: movf.s $f0, $f12, $fcc0 -; 64R2: c.ule.d $f14, $f15 -; 64R2: movf.s $f13, $f12, $fcc0 ; 64R2: mov.s $f0, $f13 +; 64R2: c.ule.d $f14, $f15 +; 64R2: movf.s $f0, $f12, $fcc0 ; 64R6: cmp.lt.d $[[CC:f0]], $f15, $f14 ; 64R6: sel.s $[[CC]], $f13, $f12 Index: test/CodeGen/PowerPC/licm-tocReg.ll =================================================================== --- test/CodeGen/PowerPC/licm-tocReg.ll +++ test/CodeGen/PowerPC/licm-tocReg.ll @@ -68,19 +68,19 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: addis 4, 2, .LC0@toc@ha ; CHECK-NEXT: addis 5, 2, .LC1@toc@ha +; CHECK-NEXT: mr 12, 3 ; CHECK-NEXT: ld 4, .LC0@toc@l(4) ; CHECK-NEXT: ld 5, .LC1@toc@l(5) ; CHECK-NEXT: lwz 6, 0(4) ; CHECK-NEXT: lwz 5, 0(5) +; CHECK-NEXT: lwz 4, 0(4) ; CHECK-NEXT: cmpw 6, 5 -; CHECK-NEXT: lwz 5, 0(4) -; CHECK-NEXT: mr 4, 3 ; CHECK-NEXT: bgt 0, .LBB0_3 ; CHECK-NEXT: # BB#1: ; CHECK-NEXT: addis 3, 2, .LC0@toc@ha -; CHECK-NEXT: addis 6, 2, .LC1@toc@ha +; CHECK-NEXT: addis 5, 2, .LC1@toc@ha ; CHECK-NEXT: ld 3, .LC0@toc@l(3) -; CHECK-NEXT: ld 6, .LC1@toc@l(6) +; CHECK-NEXT: ld 5, .LC1@toc@l(5) ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_2: # %if.end ; CHECK-NOT: addis {{[0-9]+}}, 2, .LC0@toc@ha Index: test/CodeGen/PowerPC/load-two-flts.ll =================================================================== --- test/CodeGen/PowerPC/load-two-flts.ll +++ test/CodeGen/PowerPC/load-two-flts.ll @@ -53,8 +53,8 @@ ; CHECK-NOT: ldu {{[0-9]+}}, 8(5) ; CHECK-NOT: stw ; CHECK-NOT: rldicl -; CHECK-DAG: lfsu {{[0-9]+}}, 8(5) -; CHECK-DAG: lfs {{[0-9]+}}, 4(5) +; CHECK-DAG: lfsu {{[0-9]+}}, 8(3) +; CHECK-DAG: lfs {{[0-9]+}}, 4(3) ; CHECK: blr } Index: test/CodeGen/PowerPC/ppc64-byval-align.ll =================================================================== --- test/CodeGen/PowerPC/ppc64-byval-align.ll +++ test/CodeGen/PowerPC/ppc64-byval-align.ll @@ -24,8 +24,7 @@ ret void } ; CHECK-LABEL: @caller1 -; CHECK: mr [[REG:[0-9]+]], 3 -; CHECK: mr 7, [[REG]] +; CHECK: mr 7, 3 ; CHECK: bl test1 define i64 @callee2(%struct.pad* byval nocapture readnone %x, i32 signext %y, %struct.test* byval align 16 nocapture readonly %z) { Index: test/CodeGen/PowerPC/select-i1-vs-i1.ll =================================================================== --- test/CodeGen/PowerPC/select-i1-vs-i1.ll +++ test/CodeGen/PowerPC/select-i1-vs-i1.ll @@ -477,11 +477,10 @@ ; CHECK-LABEL: @testfloatslt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -496,11 +495,10 @@ ; CHECK-LABEL: @testfloatult ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -515,11 +513,10 @@ ; CHECK-LABEL: @testfloatsle ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -534,11 +531,10 @@ ; CHECK-LABEL: @testfloatule ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -553,11 +549,10 @@ ; CHECK-LABEL: @testfloateq ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -572,11 +567,10 @@ ; CHECK-LABEL: @testfloatsge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -591,11 +585,10 @@ ; CHECK-LABEL: @testfloatuge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -610,11 +603,10 @@ ; CHECK-LABEL: @testfloatsgt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -629,11 +621,10 @@ ; CHECK-LABEL: @testfloatugt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -648,11 +639,10 @@ ; CHECK-LABEL: @testfloatne ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -667,11 +657,10 @@ ; CHECK-LABEL: @testdoubleslt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -686,11 +675,10 @@ ; CHECK-LABEL: @testdoubleult ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -705,11 +693,10 @@ ; CHECK-LABEL: @testdoublesle ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -724,11 +711,10 @@ ; CHECK-LABEL: @testdoubleule ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -743,11 +729,10 @@ ; CHECK-LABEL: @testdoubleeq ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -762,11 +747,10 @@ ; CHECK-LABEL: @testdoublesge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -781,11 +765,10 @@ ; CHECK-LABEL: @testdoubleuge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -800,11 +783,10 @@ ; CHECK-LABEL: @testdoublesgt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -819,11 +801,10 @@ ; CHECK-LABEL: @testdoubleugt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -838,11 +819,10 @@ ; CHECK-LABEL: @testdoublene ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -1231,12 +1211,11 @@ ; CHECK-LABEL: @testqv4doubleslt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1250,12 +1229,11 @@ ; CHECK-LABEL: @testqv4doubleult ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1269,12 +1247,11 @@ ; CHECK-LABEL: @testqv4doublesle ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1288,12 +1265,11 @@ ; CHECK-LABEL: @testqv4doubleule ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1307,12 +1283,11 @@ ; CHECK-LABEL: @testqv4doubleeq ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1326,12 +1301,11 @@ ; CHECK-LABEL: @testqv4doublesge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1345,12 +1319,11 @@ ; CHECK-LABEL: @testqv4doubleuge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1364,12 +1337,11 @@ ; CHECK-LABEL: @testqv4doublesgt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1383,12 +1355,11 @@ ; CHECK-LABEL: @testqv4doubleugt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1402,12 +1373,11 @@ ; CHECK-LABEL: @testqv4doublene ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1421,12 +1391,11 @@ ; CHECK-LABEL: @testqv4floatslt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1440,12 +1409,11 @@ ; CHECK-LABEL: @testqv4floatult ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1459,12 +1427,11 @@ ; CHECK-LABEL: @testqv4floatsle ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1478,12 +1445,11 @@ ; CHECK-LABEL: @testqv4floatule ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1497,12 +1463,11 @@ ; CHECK-LABEL: @testqv4floateq ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1516,12 +1481,11 @@ ; CHECK-LABEL: @testqv4floatsge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1535,12 +1499,11 @@ ; CHECK-LABEL: @testqv4floatuge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1554,12 +1517,11 @@ ; CHECK-LABEL: @testqv4floatsgt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1573,12 +1535,11 @@ ; CHECK-LABEL: @testqv4floatugt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1592,12 +1553,11 @@ ; CHECK-LABEL: @testqv4floatne ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1611,12 +1571,11 @@ ; CHECK-LABEL: @testqv4i1slt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1630,12 +1589,11 @@ ; CHECK-LABEL: @testqv4i1ult ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1649,12 +1607,11 @@ ; CHECK-LABEL: @testqv4i1sle ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1668,12 +1625,11 @@ ; CHECK-LABEL: @testqv4i1ule ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1687,12 +1643,11 @@ ; CHECK-LABEL: @testqv4i1eq ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1706,12 +1661,11 @@ ; CHECK-LABEL: @testqv4i1sge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1725,12 +1679,11 @@ ; CHECK-LABEL: @testqv4i1uge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1744,12 +1697,11 @@ ; CHECK-LABEL: @testqv4i1sgt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1763,12 +1715,11 @@ ; CHECK-LABEL: @testqv4i1ugt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1782,12 +1733,11 @@ ; CHECK-LABEL: @testqv4i1ne ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } Index: test/CodeGen/SPARC/32abi.ll =================================================================== --- test/CodeGen/SPARC/32abi.ll +++ test/CodeGen/SPARC/32abi.ll @@ -88,36 +88,28 @@ ; SOFT-NEXT: mov %i2, %o0 ; SOFT-NEXT: call __extendsfdf2 ; SOFT-NEXT: nop -; SOFT-NEXT: mov %o0, %i2 -; SOFT-NEXT: mov %o1, %g2 +; SOFT-NEXT: mov %o0, %o2 +; SOFT-NEXT: mov %o1, %o3 ; SOFT-NEXT: mov %i0, %o0 ; SOFT-NEXT: mov %i1, %o1 -; SOFT-NEXT: mov %i2, %o2 -; SOFT-NEXT: mov %g2, %o3 ; SOFT-NEXT: call __adddf3 ; SOFT-NEXT: nop -; SOFT-NEXT: mov %o0, %i0 -; SOFT-NEXT: mov %o1, %i1 +; SOFT-NEXT: mov %o0, %o2 +; SOFT-NEXT: mov %o1, %o3 ; SOFT-NEXT: mov %i3, %o0 ; SOFT-NEXT: mov %i4, %o1 -; SOFT-NEXT: mov %i0, %o2 -; SOFT-NEXT: mov %i1, %o3 ; SOFT-NEXT: call __adddf3 ; SOFT-NEXT: nop -; SOFT-NEXT: mov %o0, %i0 -; SOFT-NEXT: mov %o1, %i1 +; SOFT-NEXT: mov %o0, %o2 +; SOFT-NEXT: mov %o1, %o3 ; SOFT-NEXT: mov %i5, %o0 ; SOFT-NEXT: mov %l3, %o1 -; SOFT-NEXT: mov %i0, %o2 -; SOFT-NEXT: mov %i1, %o3 ; SOFT-NEXT: call __adddf3 ; SOFT-NEXT: nop -; SOFT-NEXT: mov %o0, %i0 -; SOFT-NEXT: mov %o1, %i1 +; SOFT-NEXT: mov %o0, %o2 +; SOFT-NEXT: mov %o1, %o3 ; SOFT-NEXT: mov %l1, %o0 ; SOFT-NEXT: mov %l2, %o1 -; SOFT-NEXT: mov %i0, %o2 -; SOFT-NEXT: mov %i1, %o3 ; SOFT-NEXT: call __adddf3 ; SOFT-NEXT: nop ; SOFT-NEXT: mov %o0, %i0 Index: test/CodeGen/SPARC/64abi.ll =================================================================== --- test/CodeGen/SPARC/64abi.ll +++ test/CodeGen/SPARC/64abi.ll @@ -65,7 +65,7 @@ ; SOFT: save %sp, -176, %sp ; SOFT: srl %i0, 0, %o0 ; SOFT-NEXT: call __extendsfdf2 -; SOFT: mov %o0, %i0 +; SOFT: mov %o0, %o1 ; SOFT: mov %i1, %o0 ; SOFT: mov %i2, %o0 ; SOFT: mov %i3, %o0 @@ -145,13 +145,11 @@ ; HARD: fstod %f3 ; HARD: faddd %f6 ; HARD: faddd %f16 -; SOFT: mov %o0, %i1 +; SOFT: mov %o0, %o1 ; SOFT-NEXT: mov %i3, %o0 -; SOFT-NEXT: mov %i1, %o1 ; SOFT-NEXT: call __adddf3 -; SOFT: mov %o0, %i1 +; SOFT: mov %o0, %o1 ; SOFT-NEXT: mov %i0, %o0 -; SOFT-NEXT: mov %i1, %o1 ; SOFT-NEXT: call __adddf3 ; HARD: std %f0, [%i1] ; SOFT: stx %o0, [%i5] @@ -217,8 +215,8 @@ ; CHECK-LABEL: call_inreg_fi: ; Allocate space for 6 arguments, even when only 2 are used. ; CHECK: save %sp, -176, %sp -; HARD: sllx %i1, 32, %o0 -; HARD: fmovs %f5, %f1 +; HARD-DAG: sllx %i1, 32, %o0 +; HARD-DAG: fmovs %f5, %f1 ; SOFT: srl %i2, 0, %i0 ; SOFT: sllx %i1, 32, %i1 ; SOFT: or %i1, %i0, %o0 @@ -240,8 +238,8 @@ } ; CHECK-LABEL: call_inreg_ff: -; HARD: fmovs %f3, %f0 -; HARD: fmovs %f5, %f1 +; HARD-DAG: fmovs %f3, %f0 +; HARD-DAG: fmovs %f5, %f1 ; SOFT: srl %i2, 0, %i0 ; SOFT: sllx %i1, 32, %i1 ; SOFT: or %i1, %i0, %o0 @@ -527,9 +525,8 @@ ; CHECK: call sinf ; HARD: ld [%fp+[[Offset1]]], %f1 ; HARD: fmuls %f1, %f0, %f0 -; SOFT: mov %o0, %i0 +; SOFT: mov %o0, %o1 ; SOFT: mov %i1, %o0 -; SOFT: mov %i0, %o1 ; SOFT: call __mulsf3 ; SOFT: sllx %o0, 32, %i0 Index: test/CodeGen/SPARC/64cond.ll =================================================================== --- test/CodeGen/SPARC/64cond.ll +++ test/CodeGen/SPARC/64cond.ll @@ -67,9 +67,10 @@ } ; CHECK: selecti64_fcc +; CHECK: mov %i3, %i0 ; CHECK: fcmps %f1, %f3 -; CHECK: movul %fcc0, %i2, %i3 -; CHECK: restore %g0, %i3, %o0 +; CHECK: movul %fcc0, %i2, %i0 +; CHECK: restore define i64 @selecti64_fcc(float %x, float %y, i64 %a, i64 %b) { entry: %tobool = fcmp ult float %x, %y @@ -78,9 +79,9 @@ } ; CHECK: selectf32_xcc -; CHECK: cmp %i0, %i1 -; CHECK: fmovsg %xcc, %f5, %f7 ; CHECK: fmovs %f7, %f0 +; CHECK: cmp %i0, %i1 +; CHECK: fmovsg %xcc, %f5, %f0 define float @selectf32_xcc(i64 %x, i64 %y, float %a, float %b) { entry: %tobool = icmp sgt i64 %x, %y @@ -89,9 +90,9 @@ } ; CHECK: selectf64_xcc -; CHECK: cmp %i0, %i1 -; CHECK: fmovdg %xcc, %f4, %f6 ; CHECK: fmovd %f6, %f0 +; CHECK: cmp %i0, %i1 +; CHECK: fmovdg %xcc, %f4, %f0 define double @selectf64_xcc(i64 %x, i64 %y, double %a, double %b) { entry: %tobool = icmp sgt i64 %x, %y Index: test/CodeGen/SystemZ/call-03.ll =================================================================== --- test/CodeGen/SystemZ/call-03.ll +++ test/CodeGen/SystemZ/call-03.ll @@ -62,16 +62,13 @@ ; Check an indirect call. In this case the only acceptable choice for ; the target register is %r1. -; -; NOTE: the extra copy 'lgr %r1, %r0' is a coalescing failure. define void @f5(void(i32, i32, i32, i32) *%foo) { ; CHECK-LABEL: f5: -; CHECK: lgr %r0, %r2 +; CHECK: lgr %r1, %r2 ; CHECK-DAG: lhi %r2, 1 ; CHECK-DAG: lhi %r3, 2 ; CHECK-DAG: lhi %r4, 3 ; CHECK-DAG: lhi %r5, 4 -; CHECK: lgr %r1, %r0 ; CHECK: br %r1 tail call void %foo(i32 1, i32 2, i32 3, i32 4) ret void Index: test/CodeGen/SystemZ/swift-return.ll =================================================================== --- test/CodeGen/SystemZ/swift-return.ll +++ test/CodeGen/SystemZ/swift-return.ll @@ -39,9 +39,8 @@ ; in memroy. The caller provides space for the return value and passes ; the address in %r2. The first input argument will be in %r3. ; CHECK-LABEL: test2: -; CHECK: lr %[[REG1:r[0-9]+]], %r2 +; CHECK: lr %r3, %r2 ; CHECK-DAG: la %r2, 160(%r15) -; CHECK-DAG: lr %r3, %[[REG1]] ; CHECK: brasl %r14, gen2 ; CHECK: l %r2, 160(%r15) ; CHECK: a %r2, 164(%r15) Index: test/CodeGen/SystemZ/swifterror.ll =================================================================== --- test/CodeGen/SystemZ/swifterror.ll +++ test/CodeGen/SystemZ/swifterror.ll @@ -34,11 +34,11 @@ ; CHECK: lgr %r[[REG1:[0-9]+]], %r2 ; CHECK: lghi %r9, 0 ; CHECK: brasl %r14, foo -; CHECK: cgijlh %r9, 0, +; CHECK: %r2, %r9 +; CHECK: jlh ; Access part of the error object and save it to error_ref -; CHECK: lb %r[[REG2:[0-9]+]], 8(%r9) +; CHECK: lb %r[[REG2:[0-9]+]], 8(%r2) ; CHECK: stc %r[[REG2]], 0(%r[[REG1]]) -; CHECK: lgr %r2, %r9 ; CHECK: brasl %r14, free ; CHECK-O0-LABEL: caller: ; CHECK-O0: lghi %r9, 0 @@ -246,11 +246,10 @@ ; CHECK: lhi %r3, 1 ; CHECK: lghi %r9, 0 ; CHECK: brasl %r14, foo_sret -; CHECK: cgijlh %r9, 0, +; CHECK: jlh ; Access part of the error object and save it to error_ref -; CHECK: lb %r0, 8(%r9) +; CHECK: lb %r0, 8(%r2) ; CHECK: stc %r0, 0(%r[[REG1]]) -; CHECK: lgr %r2, %r9 ; CHECK: brasl %r14, free ; CHECK-O0-LABEL: caller3: @@ -296,21 +295,21 @@ ; The first swifterror value: ; CHECK: lghi %r9, 0 ; CHECK: brasl %r14, foo -; CHECK: cgijlh %r9, 0, +; CHECK: ltgr %r2, %r9 +; CHECK: jlh ; Access part of the error object and save it to error_ref -; CHECK: lb %r0, 8(%r9) +; CHECK: lb %r0, 8(%r2) ; CHECK: stc %r0, 0(%r[[REG1]]) -; CHECK: lgr %r2, %r9 ; CHECK: brasl %r14, free ; The second swifterror value: ; CHECK: lghi %r9, 0 ; CHECK: brasl %r14, foo -; CHECK: cgijlh %r9, 0, +; CHECK: ltgr %r2, %r9 +; CHECK: jlh ; Access part of the error object and save it to error_ref -; CHECK: lb %r0, 8(%r9) +; CHECK: lb %r0, 8(%r2) ; CHECK: stc %r0, 0(%r[[REG2]]) -; CHECK: lgr %r2, %r9 ; CHECK: brasl %r14, free ; CHECK-O0-LABEL: caller_with_multiple_swifterror_values: Index: test/CodeGen/Thumb/long.ll =================================================================== --- test/CodeGen/Thumb/long.ll +++ test/CodeGen/Thumb/long.ll @@ -60,10 +60,10 @@ %tmp1 = add i64 %y, 10 ret i64 %tmp1 ; CHECK-LABEL: f6a: +; CHECK: movs r0, r2 ; CHECK: movs r1, #0 -; CHECK: adds r2, #10 +; CHECK: adds r0, #10 ; CHECK: adcs r1, r3 -; CHECK: movs r0, r2 } define i64 @f6b(i64 %x, i64 %y) { @@ -102,11 +102,11 @@ %tmp1 = sub i64 %y, 10 ret i64 %tmp1 ; CHECK-LABEL: f9a: -; CHECK: movs r0, #0 -; CHECK: subs r2, #10 -; CHECK: sbcs r3, r0 -; CHECK: movs r0, r2 ; CHECK: movs r1, r3 +; CHECK: movs r0, r2 +; CHECK: movs r2, #0 +; CHECK: subs r0, #10 +; CHECK: sbcs r1, r2 } define i64 @f9b(i64 %x, i64 %y) { ; ADDC with big negative imm => SUBS reg @@ -187,14 +187,14 @@ %tmp2 = add i64 %tmp1, -1000 ret i64 %tmp2 ; CHECK-LABEL: f11: +; CHECK: movs r1, r3 ; CHECK: movs r0, #125 ; CHECK: lsls r0, r0, #3 -; CHECK: movs r1, #0 +; CHECK: movs r3, #0 ; CHECK: subs r2, r2, r0 -; CHECK: sbcs r3, r1 +; CHECK: sbcs r1, r3 ; CHECK: subs r0, r2, r0 -; CHECK: sbcs r3, r1 -; CHECK: movs r1, r3 +; CHECK: sbcs r1, r3 } ; "sub 2147483648" has to be lowered into "add -2147483648" Index: test/CodeGen/Thumb2/aapcs.ll =================================================================== --- test/CodeGen/Thumb2/aapcs.ll +++ test/CodeGen/Thumb2/aapcs.ll @@ -14,8 +14,8 @@ define double @double_in_reg(double %a, double %b) { entry: ; CHECK-LABEL: double_in_reg: -; SOFT: mov r0, r2 ; SOFT: mov r1, r3 +; SOFT: mov r0, r2 ; SP: vmov.f32 s0, s2 ; SP: vmov.f32 s1, s3 ; DP: vmov.f64 d0, d1 Index: test/CodeGen/Thumb2/thumb2-select_xform.ll =================================================================== --- test/CodeGen/Thumb2/thumb2-select_xform.ll +++ test/CodeGen/Thumb2/thumb2-select_xform.ll @@ -2,11 +2,11 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK: t1 -; CHECK: mvn r0, #-2147483648 +; CHECK: mov r0, r1 +; CHECK: mvn r1, #-2147483648 ; CHECK: cmp r2, #10 ; CHECK: it le -; CHECK: addle r1, r0 -; CHECK: mov r0, r1 +; CHECK: addle r0, r1 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 2147483647 %tmp3 = add i32 %tmp2, %b @@ -15,10 +15,10 @@ define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK: t2 +; CHECK: mov r0, r1 ; CHECK: cmp r2, #10 ; CHECK: it le -; CHECK: addle.w r1, r1, #-2147483648 -; CHECK: mov r0, r1 +; CHECK: addle.w r0, r0, #-2147483648 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 2147483648 @@ -28,10 +28,10 @@ define i32 @t3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK: t3 +; CHECK: mov r0, r1 ; CHECK: cmp r2, #10 ; CHECK: it le -; CHECK: suble r1, #10 -; CHECK: mov r0, r1 +; CHECK: suble r0, #10 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 10 %tmp3 = sub i32 %b, %tmp2 Index: test/CodeGen/X86/GlobalISel/add-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/add-scalar.ll +++ test/CodeGen/X86/GlobalISel/add-scalar.ll @@ -63,8 +63,9 @@ define i8 @test_add_i8(i8 %arg1, i8 %arg2) { ; X64-LABEL: test_add_i8: ; X64: # BB#0: -; X64-NEXT: addb %dil, %sil ; X64-NEXT: movl %esi, %eax +; X64-NEXT: addb %dil, %al +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq ; ; X32-LABEL: test_add_i8: Index: test/CodeGen/X86/GlobalISel/and-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/and-scalar.ll +++ test/CodeGen/X86/GlobalISel/and-scalar.ll @@ -18,8 +18,9 @@ define i8 @test_and_i8(i8 %arg1, i8 %arg2) { ; ALL-LABEL: test_and_i8: ; ALL: # BB#0: -; ALL-NEXT: andb %dil, %sil ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: andb %dil, %al +; ALL-NEXT: # kill: %AL %AL %EAX ; ALL-NEXT: retq %ret = and i8 %arg1, %arg2 ret i8 %ret @@ -28,8 +29,9 @@ define i16 @test_and_i16(i16 %arg1, i16 %arg2) { ; ALL-LABEL: test_and_i16: ; ALL: # BB#0: -; ALL-NEXT: andw %di, %si ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: andw %di, %ax +; ALL-NEXT: # kill: %AX %AX %EAX ; ALL-NEXT: retq %ret = and i16 %arg1, %arg2 ret i16 %ret @@ -38,8 +40,8 @@ define i32 @test_and_i32(i32 %arg1, i32 %arg2) { ; ALL-LABEL: test_and_i32: ; ALL: # BB#0: -; ALL-NEXT: andl %edi, %esi ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: andl %edi, %eax ; ALL-NEXT: retq %ret = and i32 %arg1, %arg2 ret i32 %ret @@ -48,8 +50,8 @@ define i64 @test_and_i64(i64 %arg1, i64 %arg2) { ; ALL-LABEL: test_and_i64: ; ALL: # BB#0: -; ALL-NEXT: andq %rdi, %rsi ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: andq %rdi, %rax ; ALL-NEXT: retq %ret = and i64 %arg1, %arg2 ret i64 %ret Index: test/CodeGen/X86/GlobalISel/binop.ll =================================================================== --- test/CodeGen/X86/GlobalISel/binop.ll +++ test/CodeGen/X86/GlobalISel/binop.ll @@ -7,8 +7,8 @@ define i64 @test_sub_i64(i64 %arg1, i64 %arg2) { ; ALL-LABEL: test_sub_i64: ; ALL: # BB#0: -; ALL-NEXT: subq %rsi, %rdi ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: subq %rsi, %rax ; ALL-NEXT: retq %ret = sub i64 %arg1, %arg2 ret i64 %ret @@ -17,8 +17,8 @@ define i32 @test_sub_i32(i32 %arg1, i32 %arg2) { ; ALL-LABEL: test_sub_i32: ; ALL: # BB#0: -; ALL-NEXT: subl %esi, %edi ; ALL-NEXT: movl %edi, %eax +; ALL-NEXT: subl %esi, %eax ; ALL-NEXT: retq %ret = sub i32 %arg1, %arg2 ret i32 %ret Index: test/CodeGen/X86/GlobalISel/callingconv.ll =================================================================== --- test/CodeGen/X86/GlobalISel/callingconv.ll +++ test/CodeGen/X86/GlobalISel/callingconv.ll @@ -38,6 +38,7 @@ ; X64-LABEL: test_arg_i8: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq ret i8 %a } @@ -51,6 +52,7 @@ ; X64-LABEL: test_arg_i16: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq ret i16 %a } @@ -114,8 +116,8 @@ ; X32: # BB#0: ; X32-NEXT: subl $12, %esp ; X32-NEXT: .cfi_def_cfa_offset 16 -; X32-NEXT: movups 16(%esp), %xmm1 -; X32-NEXT: movaps %xmm2, %xmm0 +; X32-DAG: movups 16(%esp), %xmm1 +; X32-DAG: movaps %xmm2, %xmm0 ; X32-NEXT: addl $12, %esp ; X32-NEXT: retl ; @@ -248,8 +250,8 @@ ; X32-NEXT: .cfi_def_cfa_offset 48 ; X32-NEXT: movaps %xmm0, (%esp) # 16-byte Spill ; X32-NEXT: movaps %xmm1, 16(%esp) # 16-byte Spill -; X32-NEXT: movdqu 48(%esp), %xmm1 -; X32-NEXT: movdqa %xmm2, %xmm0 +; X32-DAG: movdqu 48(%esp), %xmm1 +; X32-DAG: movdqa %xmm2, %xmm0 ; X32-NEXT: calll split_return_callee ; X32-NEXT: paddd (%esp), %xmm0 # 16-byte Folded Reload ; X32-NEXT: paddd 16(%esp), %xmm1 # 16-byte Folded Reload Index: test/CodeGen/X86/GlobalISel/ext-x86-64.ll =================================================================== --- test/CodeGen/X86/GlobalISel/ext-x86-64.ll +++ test/CodeGen/X86/GlobalISel/ext-x86-64.ll @@ -6,9 +6,8 @@ define i64 @test_zext_i1(i8 %a) { ; X64-LABEL: test_zext_i1: ; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: andq $1, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andq $1, %rax ; X64-NEXT: retq %val = trunc i8 %a to i1 %r = zext i1 %val to i64 Index: test/CodeGen/X86/GlobalISel/ext.ll =================================================================== --- test/CodeGen/X86/GlobalISel/ext.ll +++ test/CodeGen/X86/GlobalISel/ext.ll @@ -5,8 +5,9 @@ define i8 @test_zext_i1toi8(i32 %a) { ; X64-LABEL: test_zext_i1toi8: ; X64: # BB#0: -; X64-NEXT: andb $1, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $1, %al +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq ; ; X32-LABEL: test_zext_i1toi8: @@ -23,8 +24,9 @@ define i16 @test_zext_i1toi16(i32 %a) { ; X64-LABEL: test_zext_i1toi16: ; X64: # BB#0: -; X64-NEXT: andw $1, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andw $1, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq ; ; X32-LABEL: test_zext_i1toi16: @@ -41,8 +43,8 @@ define i32 @test_zext_i1(i32 %a) { ; X64-LABEL: test_zext_i1: ; X64: # BB#0: -; X64-NEXT: andl $1, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $1, %eax ; X64-NEXT: retq ; ; X32-LABEL: test_zext_i1: Index: test/CodeGen/X86/GlobalISel/memop-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/memop-scalar.ll +++ test/CodeGen/X86/GlobalISel/memop-scalar.ll @@ -82,9 +82,9 @@ define i1 * @test_store_i1(i1 %val, i1 * %p1) { ; ALL-LABEL: test_store_i1: ; ALL: # BB#0: -; ALL-NEXT: andb $1, %dil -; ALL-NEXT: movb %dil, (%rsi) ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: andb $1, %dil +; ALL-NEXT: movb %dil, (%rax) ; ALL-NEXT: retq store i1 %val, i1* %p1 ret i1 * %p1; @@ -93,8 +93,8 @@ define i32 * @test_store_i32(i32 %val, i32 * %p1) { ; ALL-LABEL: test_store_i32: ; ALL: # BB#0: -; ALL-NEXT: movl %edi, (%rsi) ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: movl %edi, (%rax) ; ALL-NEXT: retq store i32 %val, i32* %p1 ret i32 * %p1; @@ -103,8 +103,8 @@ define i64 * @test_store_i64(i64 %val, i64 * %p1) { ; ALL-LABEL: test_store_i64: ; ALL: # BB#0: -; ALL-NEXT: movq %rdi, (%rsi) ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: movq %rdi, (%rax) ; ALL-NEXT: retq store i64 %val, i64* %p1 ret i64 * %p1; @@ -114,15 +114,15 @@ ; ; SSE_FAST-LABEL: test_store_float: ; SSE_FAST: # BB#0: -; SSE_FAST-NEXT: movd %xmm0, %eax -; SSE_FAST-NEXT: movl %eax, (%rdi) ; SSE_FAST-NEXT: movq %rdi, %rax +; SSE_FAST-NEXT: movd %xmm0, %ecx +; SSE_FAST-NEXT: movl %ecx, (%rax) ; SSE_FAST-NEXT: retq ; ; SSE_GREEDY-LABEL: test_store_float: ; SSE_GREEDY: # BB#0: -; SSE_GREEDY-NEXT: movss %xmm0, (%rdi) ; SSE_GREEDY-NEXT: movq %rdi, %rax +; SSE_GREEDY-NEXT: movss %xmm0, (%rax) ; SSE_GREEDY-NEXT: retq store float %val, float* %p1 ret float * %p1; @@ -132,15 +132,15 @@ ; ; SSE_FAST-LABEL: test_store_double: ; SSE_FAST: # BB#0: -; SSE_FAST-NEXT: movq %xmm0, %rax -; SSE_FAST-NEXT: movq %rax, (%rdi) ; SSE_FAST-NEXT: movq %rdi, %rax +; SSE_FAST-NEXT: movq %xmm0, %rcx +; SSE_FAST-NEXT: movq %rcx, (%rax) ; SSE_FAST-NEXT: retq ; ; SSE_GREEDY-LABEL: test_store_double: ; SSE_GREEDY: # BB#0: -; SSE_GREEDY-NEXT: movsd %xmm0, (%rdi) ; SSE_GREEDY-NEXT: movq %rdi, %rax +; SSE_GREEDY-NEXT: movsd %xmm0, (%rax) ; SSE_GREEDY-NEXT: retq store double %val, double* %p1 ret double * %p1; Index: test/CodeGen/X86/GlobalISel/mul-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/mul-scalar.ll +++ test/CodeGen/X86/GlobalISel/mul-scalar.ll @@ -10,8 +10,9 @@ define i16 @test_mul_i16(i16 %arg1, i16 %arg2) { ; X64-LABEL: test_mul_i16: ; X64: # BB#0: -; X64-NEXT: imulw %di, %si ; X64-NEXT: movl %esi, %eax +; X64-NEXT: imulw %di, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %ret = mul i16 %arg1, %arg2 ret i16 %ret @@ -20,8 +21,8 @@ define i32 @test_mul_i32(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_mul_i32: ; X64: # BB#0: -; X64-NEXT: imull %edi, %esi ; X64-NEXT: movl %esi, %eax +; X64-NEXT: imull %edi, %eax ; X64-NEXT: retq %ret = mul i32 %arg1, %arg2 ret i32 %ret @@ -30,8 +31,8 @@ define i64 @test_mul_i64(i64 %arg1, i64 %arg2) { ; X64-LABEL: test_mul_i64: ; X64: # BB#0: -; X64-NEXT: imulq %rdi, %rsi ; X64-NEXT: movq %rsi, %rax +; X64-NEXT: imulq %rdi, %rax ; X64-NEXT: retq %ret = mul i64 %arg1, %arg2 ret i64 %ret Index: test/CodeGen/X86/GlobalISel/or-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/or-scalar.ll +++ test/CodeGen/X86/GlobalISel/or-scalar.ll @@ -18,8 +18,9 @@ define i8 @test_or_i8(i8 %arg1, i8 %arg2) { ; ALL-LABEL: test_or_i8: ; ALL: # BB#0: -; ALL-NEXT: orb %dil, %sil ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: orb %dil, %al +; ALL-NEXT: # kill: %AL %AL %EAX ; ALL-NEXT: retq %ret = or i8 %arg1, %arg2 ret i8 %ret @@ -28,8 +29,9 @@ define i16 @test_or_i16(i16 %arg1, i16 %arg2) { ; ALL-LABEL: test_or_i16: ; ALL: # BB#0: -; ALL-NEXT: orw %di, %si ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: orw %di, %ax +; ALL-NEXT: # kill: %AX %AX %EAX ; ALL-NEXT: retq %ret = or i16 %arg1, %arg2 ret i16 %ret @@ -38,8 +40,8 @@ define i32 @test_or_i32(i32 %arg1, i32 %arg2) { ; ALL-LABEL: test_or_i32: ; ALL: # BB#0: -; ALL-NEXT: orl %edi, %esi ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: orl %edi, %eax ; ALL-NEXT: retq %ret = or i32 %arg1, %arg2 ret i32 %ret @@ -48,8 +50,8 @@ define i64 @test_or_i64(i64 %arg1, i64 %arg2) { ; ALL-LABEL: test_or_i64: ; ALL: # BB#0: -; ALL-NEXT: orq %rdi, %rsi ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: orq %rdi, %rax ; ALL-NEXT: retq %ret = or i64 %arg1, %arg2 ret i64 %ret Index: test/CodeGen/X86/GlobalISel/phi.ll =================================================================== --- test/CodeGen/X86/GlobalISel/phi.ll +++ test/CodeGen/X86/GlobalISel/phi.ll @@ -4,15 +4,16 @@ define i8 @test_i8(i32 %a, i8 %f, i8 %t) { ; ALL-LABEL: test_i8: ; ALL: # BB#0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi -; ALL-NEXT: setg %al -; ALL-NEXT: testb $1, %al +; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorl %ecx, %ecx +; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: setg %cl +; ALL-NEXT: testb $1, %cl ; ALL-NEXT: jne .LBB0_2 ; ALL-NEXT: # BB#1: # %cond.false -; ALL-NEXT: movl %edx, %esi +; ALL-NEXT: movl %edx, %eax ; ALL-NEXT: .LBB0_2: # %cond.end -; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: # kill: %AL %AL %EAX ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 @@ -32,15 +33,16 @@ define i16 @test_i16(i32 %a, i16 %f, i16 %t) { ; ALL-LABEL: test_i16: ; ALL: # BB#0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi -; ALL-NEXT: setg %al -; ALL-NEXT: testb $1, %al +; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorl %ecx, %ecx +; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: setg %cl +; ALL-NEXT: testb $1, %cl ; ALL-NEXT: jne .LBB1_2 ; ALL-NEXT: # BB#1: # %cond.false -; ALL-NEXT: movl %edx, %esi +; ALL-NEXT: movl %edx, %eax ; ALL-NEXT: .LBB1_2: # %cond.end -; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: # kill: %AX %AX %EAX ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 @@ -60,15 +62,15 @@ define i32 @test_i32(i32 %a, i32 %f, i32 %t) { ; ALL-LABEL: test_i32: ; ALL: # BB#0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi -; ALL-NEXT: setg %al -; ALL-NEXT: testb $1, %al +; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorl %ecx, %ecx +; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: setg %cl +; ALL-NEXT: testb $1, %cl ; ALL-NEXT: jne .LBB2_2 ; ALL-NEXT: # BB#1: # %cond.false -; ALL-NEXT: movl %edx, %esi +; ALL-NEXT: movl %edx, %eax ; ALL-NEXT: .LBB2_2: # %cond.end -; ALL-NEXT: movl %esi, %eax ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 @@ -88,15 +90,15 @@ define i64 @test_i64(i32 %a, i64 %f, i64 %t) { ; ALL-LABEL: test_i64: ; ALL: # BB#0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi -; ALL-NEXT: setg %al -; ALL-NEXT: testb $1, %al +; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: xorl %ecx, %ecx +; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: setg %cl +; ALL-NEXT: testb $1, %cl ; ALL-NEXT: jne .LBB3_2 ; ALL-NEXT: # BB#1: # %cond.false -; ALL-NEXT: movq %rdx, %rsi +; ALL-NEXT: movq %rdx, %rax ; ALL-NEXT: .LBB3_2: # %cond.end -; ALL-NEXT: movq %rsi, %rax ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 Index: test/CodeGen/X86/GlobalISel/sub-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/sub-scalar.ll +++ test/CodeGen/X86/GlobalISel/sub-scalar.ll @@ -4,8 +4,8 @@ define i64 @test_sub_i64(i64 %arg1, i64 %arg2) { ; X64-LABEL: test_sub_i64: ; X64: # BB#0: -; X64-NEXT: subq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: subq %rsi, %rax ; X64-NEXT: retq %ret = sub i64 %arg1, %arg2 ret i64 %ret @@ -14,8 +14,8 @@ define i32 @test_sub_i32(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_sub_i32: ; X64: # BB#0: -; X64-NEXT: subl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: subl %esi, %eax ; X64-NEXT: retq %ret = sub i32 %arg1, %arg2 ret i32 %ret @@ -24,8 +24,9 @@ define i16 @test_sub_i16(i16 %arg1, i16 %arg2) { ; X64-LABEL: test_sub_i16: ; X64: # BB#0: -; X64-NEXT: subw %si, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: subw %si, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %ret = sub i16 %arg1, %arg2 ret i16 %ret @@ -34,8 +35,9 @@ define i8 @test_sub_i8(i8 %arg1, i8 %arg2) { ; X64-LABEL: test_sub_i8: ; X64: # BB#0: -; X64-NEXT: subb %sil, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: subb %sil, %al +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq %ret = sub i8 %arg1, %arg2 ret i8 %ret @@ -44,9 +46,9 @@ define i32 @test_sub_i1(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_sub_i1: ; X64: # BB#0: -; X64-NEXT: subb %sil, %dil -; X64-NEXT: andl $1, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: subb %sil, %al +; X64-NEXT: andl $1, %eax ; X64-NEXT: retq %a1 = trunc i32 %arg1 to i1 %a2 = trunc i32 %arg2 to i1 Index: test/CodeGen/X86/GlobalISel/trunc.ll =================================================================== --- test/CodeGen/X86/GlobalISel/trunc.ll +++ test/CodeGen/X86/GlobalISel/trunc.ll @@ -5,6 +5,7 @@ ; CHECK-LABEL: trunc_i32toi1: ; CHECK: # BB#0: ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %r = trunc i32 %a to i1 ret i1 %r @@ -14,6 +15,7 @@ ; CHECK-LABEL: trunc_i32toi8: ; CHECK: # BB#0: ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %r = trunc i32 %a to i8 ret i8 %r @@ -23,6 +25,7 @@ ; CHECK-LABEL: trunc_i32toi16: ; CHECK: # BB#0: ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: retq %r = trunc i32 %a to i16 ret i16 %r @@ -31,7 +34,8 @@ define i8 @trunc_i64toi8(i64 %a) { ; CHECK-LABEL: trunc_i64toi8: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %AL %AL %RAX ; CHECK-NEXT: retq %r = trunc i64 %a to i8 ret i8 %r @@ -40,7 +44,8 @@ define i16 @trunc_i64toi16(i64 %a) { ; CHECK-LABEL: trunc_i64toi16: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %AX %AX %RAX ; CHECK-NEXT: retq %r = trunc i64 %a to i16 ret i16 %r @@ -49,7 +54,8 @@ define i32 @trunc_i64toi32(i64 %a) { ; CHECK-LABEL: trunc_i64toi32: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %EAX %EAX %RAX ; CHECK-NEXT: retq %r = trunc i64 %a to i32 ret i32 %r Index: test/CodeGen/X86/GlobalISel/undef.ll =================================================================== --- test/CodeGen/X86/GlobalISel/undef.ll +++ test/CodeGen/X86/GlobalISel/undef.ll @@ -11,8 +11,9 @@ define i8 @test2(i8 %a) { ; ALL-LABEL: test2: ; ALL: # BB#0: -; ALL-NEXT: addb %al, %dil ; ALL-NEXT: movl %edi, %eax +; ALL-NEXT: addb %al, %al +; ALL-NEXT: # kill: %AL %AL %EAX ; ALL-NEXT: retq %r = add i8 %a, undef ret i8 %r Index: test/CodeGen/X86/GlobalISel/xor-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/xor-scalar.ll +++ test/CodeGen/X86/GlobalISel/xor-scalar.ll @@ -18,8 +18,9 @@ define i8 @test_xor_i8(i8 %arg1, i8 %arg2) { ; ALL-LABEL: test_xor_i8: ; ALL: # BB#0: -; ALL-NEXT: xorb %dil, %sil ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorb %dil, %al +; ALL-NEXT: # kill: %AL %AL %EAX ; ALL-NEXT: retq %ret = xor i8 %arg1, %arg2 ret i8 %ret @@ -28,8 +29,9 @@ define i16 @test_xor_i16(i16 %arg1, i16 %arg2) { ; ALL-LABEL: test_xor_i16: ; ALL: # BB#0: -; ALL-NEXT: xorw %di, %si ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorw %di, %ax +; ALL-NEXT: # kill: %AX %AX %EAX ; ALL-NEXT: retq %ret = xor i16 %arg1, %arg2 ret i16 %ret @@ -38,8 +40,8 @@ define i32 @test_xor_i32(i32 %arg1, i32 %arg2) { ; ALL-LABEL: test_xor_i32: ; ALL: # BB#0: -; ALL-NEXT: xorl %edi, %esi ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorl %edi, %eax ; ALL-NEXT: retq %ret = xor i32 %arg1, %arg2 ret i32 %ret @@ -48,8 +50,8 @@ define i64 @test_xor_i64(i64 %arg1, i64 %arg2) { ; ALL-LABEL: test_xor_i64: ; ALL: # BB#0: -; ALL-NEXT: xorq %rdi, %rsi ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: xorq %rdi, %rax ; ALL-NEXT: retq %ret = xor i64 %arg1, %arg2 ret i64 %ret Index: test/CodeGen/X86/add.ll =================================================================== --- test/CodeGen/X86/add.ll +++ test/CodeGen/X86/add.ll @@ -16,14 +16,14 @@ ; ; X64-LINUX-LABEL: test1: ; X64-LINUX: # BB#0: # %entry -; X64-LINUX-NEXT: subl $-128, %edi ; X64-LINUX-NEXT: movl %edi, %eax +; X64-LINUX-NEXT: subl $-128, %eax ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test1: ; X64-WIN32: # BB#0: # %entry -; X64-WIN32-NEXT: subl $-128, %ecx ; X64-WIN32-NEXT: movl %ecx, %eax +; X64-WIN32-NEXT: subl $-128, %eax ; X64-WIN32-NEXT: retq entry: %b = add i32 %a, 128 @@ -38,14 +38,14 @@ ; ; X64-LINUX-LABEL: test2: ; X64-LINUX: # BB#0: # %entry -; X64-LINUX-NEXT: subq $-2147483648, %rdi # imm = 0x80000000 ; X64-LINUX-NEXT: movq %rdi, %rax +; X64-LINUX-NEXT: subq $-2147483648, %rax # imm = 0x80000000 ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test2: ; X64-WIN32: # BB#0: # %entry -; X64-WIN32-NEXT: subq $-2147483648, %rcx # imm = 0x80000000 ; X64-WIN32-NEXT: movq %rcx, %rax +; X64-WIN32-NEXT: subq $-2147483648, %rax # imm = 0x80000000 ; X64-WIN32-NEXT: retq entry: %b = add i64 %a, 2147483648 @@ -60,14 +60,14 @@ ; ; X64-LINUX-LABEL: test3: ; X64-LINUX: # BB#0: # %entry -; X64-LINUX-NEXT: subq $-128, %rdi ; X64-LINUX-NEXT: movq %rdi, %rax +; X64-LINUX-NEXT: subq $-128, %rax ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test3: ; X64-WIN32: # BB#0: # %entry -; X64-WIN32-NEXT: subq $-128, %rcx ; X64-WIN32-NEXT: movq %rcx, %rax +; X64-WIN32-NEXT: subq $-128, %rax ; X64-WIN32-NEXT: retq entry: %b = add i64 %a, 128 @@ -204,16 +204,16 @@ ; ; X64-LINUX-LABEL: test7: ; X64-LINUX: # BB#0: # %entry -; X64-LINUX-NEXT: addl %esi, %edi -; X64-LINUX-NEXT: setb %dl ; X64-LINUX-NEXT: movl %edi, %eax +; X64-LINUX-NEXT: addl %esi, %eax +; X64-LINUX-NEXT: setb %dl ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test7: ; X64-WIN32: # BB#0: # %entry -; X64-WIN32-NEXT: addl %edx, %ecx -; X64-WIN32-NEXT: setb %dl ; X64-WIN32-NEXT: movl %ecx, %eax +; X64-WIN32-NEXT: addl %edx, %eax +; X64-WIN32-NEXT: setb %dl ; X64-WIN32-NEXT: retq entry: %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) @@ -233,16 +233,16 @@ ; ; X64-LINUX-LABEL: test8: ; X64-LINUX: # BB#0: # %entry -; X64-LINUX-NEXT: addq %rsi, %rdi -; X64-LINUX-NEXT: setb %dl ; X64-LINUX-NEXT: movq %rdi, %rax +; X64-LINUX-NEXT: addq %rsi, %rax +; X64-LINUX-NEXT: setb %dl ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test8: ; X64-WIN32: # BB#0: # %entry -; X64-WIN32-NEXT: addq %rdx, %rcx -; X64-WIN32-NEXT: setb %dl ; X64-WIN32-NEXT: movq %rcx, %rax +; X64-WIN32-NEXT: addq %rdx, %rax +; X64-WIN32-NEXT: setb %dl ; X64-WIN32-NEXT: retq entry: %extleft = zext i64 %left to i65 @@ -268,20 +268,20 @@ ; ; X64-LINUX-LABEL: test9: ; X64-LINUX: # BB#0: # %entry -; X64-LINUX-NEXT: xorl %eax, %eax -; X64-LINUX-NEXT: cmpl $10, %edi -; X64-LINUX-NEXT: sete %al -; X64-LINUX-NEXT: subl %eax, %esi ; X64-LINUX-NEXT: movl %esi, %eax +; X64-LINUX-NEXT: xorl %ecx, %ecx +; X64-LINUX-NEXT: cmpl $10, %edi +; X64-LINUX-NEXT: sete %cl +; X64-LINUX-NEXT: subl %ecx, %eax ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test9: ; X64-WIN32: # BB#0: # %entry -; X64-WIN32-NEXT: xorl %eax, %eax -; X64-WIN32-NEXT: cmpl $10, %ecx -; X64-WIN32-NEXT: sete %al -; X64-WIN32-NEXT: subl %eax, %edx ; X64-WIN32-NEXT: movl %edx, %eax +; X64-WIN32-NEXT: xorl %edx, %edx +; X64-WIN32-NEXT: cmpl $10, %ecx +; X64-WIN32-NEXT: sete %dl +; X64-WIN32-NEXT: subl %edx, %eax ; X64-WIN32-NEXT: retq entry: %cmp = icmp eq i32 %x, 10 Index: test/CodeGen/X86/addcarry.ll =================================================================== --- test/CodeGen/X86/addcarry.ll +++ test/CodeGen/X86/addcarry.ll @@ -110,15 +110,15 @@ define %scalar @pr31719(%scalar* nocapture readonly %this, %scalar %arg.b) { ; CHECK-LABEL: pr31719: ; CHECK: # BB#0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: addq (%rsi), %rdx ; CHECK-NEXT: adcq 8(%rsi), %rcx ; CHECK-NEXT: adcq 16(%rsi), %r8 ; CHECK-NEXT: adcq 24(%rsi), %r9 -; CHECK-NEXT: movq %rdx, (%rdi) -; CHECK-NEXT: movq %rcx, 8(%rdi) -; CHECK-NEXT: movq %r8, 16(%rdi) -; CHECK-NEXT: movq %r9, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rdx, (%rax) +; CHECK-NEXT: movq %rcx, 8(%rax) +; CHECK-NEXT: movq %r8, 16(%rax) +; CHECK-NEXT: movq %r9, 24(%rax) ; CHECK-NEXT: retq entry: %0 = extractvalue %scalar %arg.b, 0 @@ -206,9 +206,9 @@ define i64 @shiftadd(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: shiftadd: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: addq %rsi, %rdi -; CHECK-NEXT: adcq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: addq %rsi, %rdi +; CHECK-NEXT: adcq %rcx, %rax ; CHECK-NEXT: retq entry: %0 = zext i64 %a to i128 @@ -226,23 +226,23 @@ define %S @readd(%S* nocapture readonly %this, %S %arg.b) { ; CHECK-LABEL: readd: ; CHECK: # BB#0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: addq (%rsi), %rdx -; CHECK-NEXT: movq 8(%rsi), %r10 -; CHECK-NEXT: adcq $0, %r10 -; CHECK-NEXT: setb %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: addq %rcx, %r10 -; CHECK-NEXT: adcq 16(%rsi), %rax +; CHECK-NEXT: movq 8(%rsi), %r11 +; CHECK-NEXT: adcq $0, %r11 +; CHECK-NEXT: setb %r10b +; CHECK-NEXT: movzbl %r10b, %edi +; CHECK-NEXT: addq %rcx, %r11 +; CHECK-NEXT: adcq 16(%rsi), %rdi ; CHECK-NEXT: setb %cl ; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: addq %r8, %rax +; CHECK-NEXT: addq %r8, %rdi ; CHECK-NEXT: adcq 24(%rsi), %rcx ; CHECK-NEXT: addq %r9, %rcx -; CHECK-NEXT: movq %rdx, (%rdi) -; CHECK-NEXT: movq %r10, 8(%rdi) -; CHECK-NEXT: movq %rax, 16(%rdi) -; CHECK-NEXT: movq %rcx, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rdx, (%rax) +; CHECK-NEXT: movq %r11, 8(%rax) +; CHECK-NEXT: movq %rdi, 16(%rax) +; CHECK-NEXT: movq %rcx, 24(%rax) ; CHECK-NEXT: retq entry: %0 = extractvalue %S %arg.b, 0 Index: test/CodeGen/X86/andimm8.ll =================================================================== --- test/CodeGen/X86/andimm8.ll +++ test/CodeGen/X86/andimm8.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -show-mc-encoding | FileCheck %s ; PR8365 -; CHECK: andl $-64, %edi # encoding: [0x83,0xe7,0xc0] +; CHECK: andl $-64, %eax # encoding: [0x83,0xe0,0xc0] define i64 @bra(i32 %zed) nounwind { %t1 = zext i32 %zed to i64 @@ -19,13 +19,13 @@ } define i64 @bar(i64 %zed) nounwind { -; CHECK: andl $42, %edi # encoding: [0x83,0xe7,0x2a] +; CHECK: andl $42, %eax # encoding: [0x83,0xe0,0x2a] %t1 = and i64 %zed, 42 ret i64 %t1 } define i64 @baz(i64 %zed) nounwind { -; CHECK: andl $2147483647, %edi # encoding: [0x81,0xe7,0xff,0xff,0xff,0x7f] +; CHECK: andl $2147483647, %eax # encoding: [0x25,0xff,0xff,0xff,0x7f] %t1 = and i64 %zed, 2147483647 ret i64 %t1 } Index: test/CodeGen/X86/anyext.ll =================================================================== --- test/CodeGen/X86/anyext.ll +++ test/CodeGen/X86/anyext.ll @@ -41,8 +41,9 @@ ; ; X64-LABEL: bar: ; X64: # BB#0: -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: divw %si ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: andl $1, %eax Index: test/CodeGen/X86/apm.ll =================================================================== --- test/CodeGen/X86/apm.ll +++ test/CodeGen/X86/apm.ll @@ -3,8 +3,8 @@ ; PR8573 ; CHECK-LABEL: foo: -; CHECK: leaq (%rdi), %rax -; CHECK-NEXT: movl %esi, %ecx +; CHECK: movl %esi, %ecx +; CHECK-NEXT: leaq (%rdi), %rax ; CHECK-NEXT: monitor ; WIN64-LABEL: foo: ; WIN64: leaq (%rcx), %rax @@ -20,8 +20,8 @@ declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind ; CHECK-LABEL: bar: -; CHECK: movl %edi, %ecx -; CHECK-NEXT: movl %esi, %eax +; CHECK: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx ; CHECK-NEXT: mwait ; WIN64-LABEL: bar: ; WIN64: movl %edx, %eax Index: test/CodeGen/X86/atomic-eflags-reuse.ll =================================================================== --- test/CodeGen/X86/atomic-eflags-reuse.ll +++ test/CodeGen/X86/atomic-eflags-reuse.ll @@ -5,16 +5,16 @@ define i32 @test_add_1_cmov_slt(i64* %p, i32 %a0, i32 %a1) #0 { ; FASTINCDEC-LABEL: test_add_1_cmov_slt: ; FASTINCDEC: # BB#0: # %entry -; FASTINCDEC-NEXT: lock incq (%rdi) -; FASTINCDEC-NEXT: cmovgl %edx, %esi ; FASTINCDEC-NEXT: movl %esi, %eax +; FASTINCDEC-NEXT: lock incq (%rdi) +; FASTINCDEC-NEXT: cmovgl %edx, %eax ; FASTINCDEC-NEXT: retq ; ; SLOWINCDEC-LABEL: test_add_1_cmov_slt: ; SLOWINCDEC: # BB#0: # %entry -; SLOWINCDEC-NEXT: lock addq $1, (%rdi) -; SLOWINCDEC-NEXT: cmovgl %edx, %esi ; SLOWINCDEC-NEXT: movl %esi, %eax +; SLOWINCDEC-NEXT: lock addq $1, (%rdi) +; SLOWINCDEC-NEXT: cmovgl %edx, %eax ; SLOWINCDEC-NEXT: retq entry: %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst @@ -26,16 +26,16 @@ define i32 @test_add_1_cmov_sge(i64* %p, i32 %a0, i32 %a1) #0 { ; FASTINCDEC-LABEL: test_add_1_cmov_sge: ; FASTINCDEC: # BB#0: # %entry -; FASTINCDEC-NEXT: lock incq (%rdi) -; FASTINCDEC-NEXT: cmovlel %edx, %esi ; FASTINCDEC-NEXT: movl %esi, %eax +; FASTINCDEC-NEXT: lock incq (%rdi) +; FASTINCDEC-NEXT: cmovlel %edx, %eax ; FASTINCDEC-NEXT: retq ; ; SLOWINCDEC-LABEL: test_add_1_cmov_sge: ; SLOWINCDEC: # BB#0: # %entry -; SLOWINCDEC-NEXT: lock addq $1, (%rdi) -; SLOWINCDEC-NEXT: cmovlel %edx, %esi ; SLOWINCDEC-NEXT: movl %esi, %eax +; SLOWINCDEC-NEXT: lock addq $1, (%rdi) +; SLOWINCDEC-NEXT: cmovlel %edx, %eax ; SLOWINCDEC-NEXT: retq entry: %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst @@ -47,16 +47,16 @@ define i32 @test_sub_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 { ; FASTINCDEC-LABEL: test_sub_1_cmov_sle: ; FASTINCDEC: # BB#0: # %entry -; FASTINCDEC-NEXT: lock decq (%rdi) -; FASTINCDEC-NEXT: cmovgel %edx, %esi ; FASTINCDEC-NEXT: movl %esi, %eax +; FASTINCDEC-NEXT: lock decq (%rdi) +; FASTINCDEC-NEXT: cmovgel %edx, %eax ; FASTINCDEC-NEXT: retq ; ; SLOWINCDEC-LABEL: test_sub_1_cmov_sle: ; SLOWINCDEC: # BB#0: # %entry -; SLOWINCDEC-NEXT: lock addq $-1, (%rdi) -; SLOWINCDEC-NEXT: cmovgel %edx, %esi ; SLOWINCDEC-NEXT: movl %esi, %eax +; SLOWINCDEC-NEXT: lock addq $-1, (%rdi) +; SLOWINCDEC-NEXT: cmovgel %edx, %eax ; SLOWINCDEC-NEXT: retq entry: %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst @@ -68,16 +68,16 @@ define i32 @test_sub_1_cmov_sgt(i64* %p, i32 %a0, i32 %a1) #0 { ; FASTINCDEC-LABEL: test_sub_1_cmov_sgt: ; FASTINCDEC: # BB#0: # %entry -; FASTINCDEC-NEXT: lock decq (%rdi) -; FASTINCDEC-NEXT: cmovll %edx, %esi ; FASTINCDEC-NEXT: movl %esi, %eax +; FASTINCDEC-NEXT: lock decq (%rdi) +; FASTINCDEC-NEXT: cmovll %edx, %eax ; FASTINCDEC-NEXT: retq ; ; SLOWINCDEC-LABEL: test_sub_1_cmov_sgt: ; SLOWINCDEC: # BB#0: # %entry -; SLOWINCDEC-NEXT: lock addq $-1, (%rdi) -; SLOWINCDEC-NEXT: cmovll %edx, %esi ; SLOWINCDEC-NEXT: movl %esi, %eax +; SLOWINCDEC-NEXT: lock addq $-1, (%rdi) +; SLOWINCDEC-NEXT: cmovll %edx, %eax ; SLOWINCDEC-NEXT: retq entry: %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst @@ -159,11 +159,11 @@ define i32 @test_add_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 { ; CHECK-LABEL: test_add_1_cmov_sle: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: lock xaddq %rax, (%rdi) -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: cmovgl %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: lock xaddq %rcx, (%rdi) +; CHECK-NEXT: testq %rcx, %rcx +; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: retq entry: %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst @@ -175,11 +175,11 @@ define i32 @test_add_1_cmov_sgt(i64* %p, i32 %a0, i32 %a1) #0 { ; CHECK-LABEL: test_add_1_cmov_sgt: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: lock xaddq %rax, (%rdi) -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: cmovlel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: lock xaddq %rcx, (%rdi) +; CHECK-NEXT: testq %rcx, %rcx +; CHECK-NEXT: cmovlel %edx, %eax ; CHECK-NEXT: retq entry: %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst @@ -228,7 +228,19 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: lock xaddq %rax, (%rdi) -; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: movb $12, %al +; CHECK-NEXT: js .LBB11_2 +; CHECK-NEXT: # BB#1: # %entry +; CHECK-NEXT: movb $34, %al +; CHECK-NEXT: .LBB11_2: # %entry +; CHECK-NEXT: movb %al, (%rsi) +; CHECK-NEXT: movb $56, %al +; CHECK-NEXT: js .LBB11_4 +; CHECK-NEXT: # BB#3: # %entry +; CHECK-NEXT: movb $78, %al +; CHECK-NEXT: .LBB11_4: # %entry +; CHECK-NEXT: retq entry: %add = atomicrmw add i64* %p, i64 1 seq_cst %cmp = icmp slt i64 %add, 0 Index: test/CodeGen/X86/atomic128.ll =================================================================== --- test/CodeGen/X86/atomic128.ll +++ test/CodeGen/X86/atomic128.ll @@ -12,10 +12,9 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset %rbx, -16 -; CHECK-NEXT: movq %rcx, %r9 +; CHECK-NEXT: movq %rcx, %rbx ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: movq %r8, %rcx -; CHECK-NEXT: movq %r9, %rbx ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq Index: test/CodeGen/X86/avg.ll =================================================================== --- test/CodeGen/X86/avg.ll +++ test/CodeGen/X86/avg.ll @@ -270,180 +270,180 @@ ; SSE2: # BB#0: ; SSE2-NEXT: movdqa (%rdi), %xmm6 ; SSE2-NEXT: movdqa 16(%rdi), %xmm2 -; SSE2-NEXT: movdqa 32(%rdi), %xmm1 -; SSE2-NEXT: movdqa 48(%rdi), %xmm0 -; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) # 16-byte Spill -; SSE2-NEXT: movdqa (%rsi), %xmm5 -; SSE2-NEXT: movdqa 16(%rsi), %xmm13 +; SSE2-NEXT: movdqa 32(%rdi), %xmm8 +; SSE2-NEXT: movdqa 48(%rdi), %xmm13 +; SSE2-NEXT: movdqa (%rsi), %xmm9 +; SSE2-NEXT: movdqa 16(%rsi), %xmm10 ; SSE2-NEXT: movdqa 32(%rsi), %xmm11 -; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: movdqa %xmm6, %xmm4 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15] ; SSE2-NEXT: movdqa %xmm4, %xmm7 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm7 = xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3],xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm7 = xmm7[4],xmm1[4],xmm7[5],xmm1[5],xmm7[6],xmm1[6],xmm7[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm1[0],xmm6[1],xmm1[1],xmm6[2],xmm1[2],xmm6[3],xmm1[3],xmm6[4],xmm1[4],xmm6[5],xmm1[5],xmm6[6],xmm1[6],xmm6[7],xmm1[7] ; SSE2-NEXT: movdqa %xmm6, %xmm12 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm12 = xmm12[4],xmm0[4],xmm12[5],xmm0[5],xmm12[6],xmm0[6],xmm12[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm12 = xmm12[4],xmm1[4],xmm12[5],xmm1[5],xmm12[6],xmm1[6],xmm12[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm1[0],xmm6[1],xmm1[1],xmm6[2],xmm1[2],xmm6[3],xmm1[3] ; SSE2-NEXT: movdqa %xmm2, %xmm15 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm15 = xmm15[8],xmm0[8],xmm15[9],xmm0[9],xmm15[10],xmm0[10],xmm15[11],xmm0[11],xmm15[12],xmm0[12],xmm15[13],xmm0[13],xmm15[14],xmm0[14],xmm15[15],xmm0[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm15 = xmm15[8],xmm1[8],xmm15[9],xmm1[9],xmm15[10],xmm1[10],xmm15[11],xmm1[11],xmm15[12],xmm1[12],xmm15[13],xmm1[13],xmm15[14],xmm1[14],xmm15[15],xmm1[15] ; SSE2-NEXT: movdqa %xmm15, %xmm14 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm14 = xmm14[4],xmm0[4],xmm14[5],xmm0[5],xmm14[6],xmm0[6],xmm14[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: movdqa %xmm2, %xmm8 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm8 = xmm8[4],xmm0[4],xmm8[5],xmm0[5],xmm8[6],xmm0[6],xmm8[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: movdqa %xmm5, %xmm10 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm10 = xmm10[8],xmm0[8],xmm10[9],xmm0[9],xmm10[10],xmm0[10],xmm10[11],xmm0[11],xmm10[12],xmm0[12],xmm10[13],xmm0[13],xmm10[14],xmm0[14],xmm10[15],xmm0[15] -; SSE2-NEXT: movdqa %xmm10, %xmm3 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-NEXT: paddd %xmm7, %xmm3 -; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) # 16-byte Spill -; SSE2-NEXT: movdqa %xmm1, %xmm7 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm0[8],xmm7[9],xmm0[9],xmm7[10],xmm0[10],xmm7[11],xmm0[11],xmm7[12],xmm0[12],xmm7[13],xmm0[13],xmm7[14],xmm0[14],xmm7[15],xmm0[15] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm0[0],xmm10[1],xmm0[1],xmm10[2],xmm0[2],xmm10[3],xmm0[3] -; SSE2-NEXT: paddd %xmm4, %xmm10 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3],xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] -; SSE2-NEXT: movdqa %xmm5, %xmm3 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-NEXT: paddd %xmm12, %xmm3 -; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) # 16-byte Spill -; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3] -; SSE2-NEXT: paddd %xmm6, %xmm5 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm14 = xmm14[4],xmm1[4],xmm14[5],xmm1[5],xmm14[6],xmm1[6],xmm14[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm15 = xmm15[0],xmm1[0],xmm15[1],xmm1[1],xmm15[2],xmm1[2],xmm15[3],xmm1[3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-NEXT: movdqa %xmm9, %xmm3 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] +; SSE2-NEXT: movdqa %xmm3, %xmm5 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] +; SSE2-NEXT: paddd %xmm7, %xmm5 ; SSE2-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) # 16-byte Spill -; SSE2-NEXT: movdqa %xmm13, %xmm4 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15] +; SSE2-NEXT: movdqa %xmm8, %xmm7 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm1[8],xmm7[9],xmm1[9],xmm7[10],xmm1[10],xmm7[11],xmm1[11],xmm7[12],xmm1[12],xmm7[13],xmm1[13],xmm7[14],xmm1[14],xmm7[15],xmm1[15] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSE2-NEXT: paddd %xmm4, %xmm3 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm9 = xmm9[0],xmm1[0],xmm9[1],xmm1[1],xmm9[2],xmm1[2],xmm9[3],xmm1[3],xmm9[4],xmm1[4],xmm9[5],xmm1[5],xmm9[6],xmm1[6],xmm9[7],xmm1[7] +; SSE2-NEXT: movdqa %xmm9, %xmm4 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] +; SSE2-NEXT: paddd %xmm12, %xmm4 +; SSE2-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: punpcklwd {{.*#+}} xmm9 = xmm9[0],xmm1[0],xmm9[1],xmm1[1],xmm9[2],xmm1[2],xmm9[3],xmm1[3] +; SSE2-NEXT: paddd %xmm6, %xmm9 +; SSE2-NEXT: movdqa %xmm9, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm10, %xmm4 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15] ; SSE2-NEXT: movdqa %xmm4, %xmm12 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm12 = xmm12[4],xmm0[4],xmm12[5],xmm0[5],xmm12[6],xmm0[6],xmm12[7],xmm0[7] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm12 = xmm12[4],xmm1[4],xmm12[5],xmm1[5],xmm12[6],xmm1[6],xmm12[7],xmm1[7] ; SSE2-NEXT: paddd %xmm14, %xmm12 ; SSE2-NEXT: movdqa %xmm7, %xmm5 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1],xmm7[2],xmm0[2],xmm7[3],xmm0[3] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm1[0],xmm7[1],xmm1[1],xmm7[2],xmm1[2],xmm7[3],xmm1[3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm8 = xmm8[0],xmm1[0],xmm8[1],xmm1[1],xmm8[2],xmm1[2],xmm8[3],xmm1[3],xmm8[4],xmm1[4],xmm8[5],xmm1[5],xmm8[6],xmm1[6],xmm8[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] ; SSE2-NEXT: paddd %xmm15, %xmm4 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm13 = xmm13[0],xmm0[0],xmm13[1],xmm0[1],xmm13[2],xmm0[2],xmm13[3],xmm0[3],xmm13[4],xmm0[4],xmm13[5],xmm0[5],xmm13[6],xmm0[6],xmm13[7],xmm0[7] -; SSE2-NEXT: movdqa %xmm13, %xmm15 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm15 = xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7] -; SSE2-NEXT: paddd %xmm8, %xmm15 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm13 = xmm13[0],xmm0[0],xmm13[1],xmm0[1],xmm13[2],xmm0[2],xmm13[3],xmm0[3] -; SSE2-NEXT: paddd %xmm2, %xmm13 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm10 = xmm10[0],xmm1[0],xmm10[1],xmm1[1],xmm10[2],xmm1[2],xmm10[3],xmm1[3],xmm10[4],xmm1[4],xmm10[5],xmm1[5],xmm10[6],xmm1[6],xmm10[7],xmm1[7] +; SSE2-NEXT: movdqa %xmm10, %xmm15 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm15 = xmm15[4],xmm1[4],xmm15[5],xmm1[5],xmm15[6],xmm1[6],xmm15[7],xmm1[7] +; SSE2-NEXT: paddd %xmm0, %xmm15 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm1[0],xmm10[1],xmm1[1],xmm10[2],xmm1[2],xmm10[3],xmm1[3] +; SSE2-NEXT: paddd %xmm2, %xmm10 +; SSE2-NEXT: movdqa %xmm10, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm11, %xmm6 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm0[8],xmm6[9],xmm0[9],xmm6[10],xmm0[10],xmm6[11],xmm0[11],xmm6[12],xmm0[12],xmm6[13],xmm0[13],xmm6[14],xmm0[14],xmm6[15],xmm0[15] -; SSE2-NEXT: movdqa %xmm6, %xmm9 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm9 = xmm9[4],xmm0[4],xmm9[5],xmm0[5],xmm9[6],xmm0[6],xmm9[7],xmm0[7] -; SSE2-NEXT: paddd %xmm5, %xmm9 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm1[8],xmm6[9],xmm1[9],xmm6[10],xmm1[10],xmm6[11],xmm1[11],xmm6[12],xmm1[12],xmm6[13],xmm1[13],xmm6[14],xmm1[14],xmm6[15],xmm1[15] +; SSE2-NEXT: movdqa %xmm6, %xmm10 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm10 = xmm10[4],xmm1[4],xmm10[5],xmm1[5],xmm10[6],xmm1[6],xmm10[7],xmm1[7] +; SSE2-NEXT: paddd %xmm5, %xmm10 +; SSE2-NEXT: movdqa %xmm8, %xmm2 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm1[0],xmm8[1],xmm1[1],xmm8[2],xmm1[2],xmm8[3],xmm1[3] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm1[0],xmm6[1],xmm1[1],xmm6[2],xmm1[2],xmm6[3],xmm1[3] ; SSE2-NEXT: paddd %xmm7, %xmm6 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm11 = xmm11[0],xmm0[0],xmm11[1],xmm0[1],xmm11[2],xmm0[2],xmm11[3],xmm0[3],xmm11[4],xmm0[4],xmm11[5],xmm0[5],xmm11[6],xmm0[6],xmm11[7],xmm0[7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm11 = xmm11[0],xmm1[0],xmm11[1],xmm1[1],xmm11[2],xmm1[2],xmm11[3],xmm1[3],xmm11[4],xmm1[4],xmm11[5],xmm1[5],xmm11[6],xmm1[6],xmm11[7],xmm1[7] ; SSE2-NEXT: movdqa %xmm11, %xmm14 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm14 = xmm14[4],xmm0[4],xmm14[5],xmm0[5],xmm14[6],xmm0[6],xmm14[7],xmm0[7] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm14 = xmm14[4],xmm1[4],xmm14[5],xmm1[5],xmm14[6],xmm1[6],xmm14[7],xmm1[7] ; SSE2-NEXT: paddd %xmm2, %xmm14 -; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Reload -; SSE2-NEXT: movdqa %xmm5, %xmm2 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm11 = xmm11[0],xmm0[0],xmm11[1],xmm0[1],xmm11[2],xmm0[2],xmm11[3],xmm0[3] -; SSE2-NEXT: paddd %xmm1, %xmm11 -; SSE2-NEXT: movdqa %xmm2, %xmm1 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: movdqa %xmm13, %xmm0 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm11 = xmm11[0],xmm1[0],xmm11[1],xmm1[1],xmm11[2],xmm1[2],xmm11[3],xmm1[3] +; SSE2-NEXT: paddd %xmm8, %xmm11 +; SSE2-NEXT: movdqa %xmm0, %xmm5 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] ; SSE2-NEXT: movdqa 48(%rsi), %xmm7 -; SSE2-NEXT: movdqa %xmm7, %xmm3 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15] -; SSE2-NEXT: movdqa %xmm3, %xmm8 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm8 = xmm8[4],xmm0[4],xmm8[5],xmm0[5],xmm8[6],xmm0[6],xmm8[7],xmm0[7] -; SSE2-NEXT: paddd %xmm1, %xmm8 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] -; SSE2-NEXT: paddd %xmm2, %xmm3 -; SSE2-NEXT: movdqa %xmm5, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: movdqa %xmm2, %xmm1 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1],xmm7[2],xmm0[2],xmm7[3],xmm0[3],xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7] -; SSE2-NEXT: movdqa %xmm7, %xmm5 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] -; SSE2-NEXT: paddd %xmm1, %xmm5 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1],xmm7[2],xmm0[2],xmm7[3],xmm0[3] -; SSE2-NEXT: paddd %xmm2, %xmm7 +; SSE2-NEXT: movdqa %xmm7, %xmm2 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] +; SSE2-NEXT: movdqa %xmm2, %xmm9 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm9 = xmm9[4],xmm1[4],xmm9[5],xmm1[5],xmm9[6],xmm1[6],xmm9[7],xmm1[7] +; SSE2-NEXT: paddd %xmm5, %xmm9 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-NEXT: paddd %xmm0, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm13 = xmm13[0],xmm1[0],xmm13[1],xmm1[1],xmm13[2],xmm1[2],xmm13[3],xmm1[3],xmm13[4],xmm1[4],xmm13[5],xmm1[5],xmm13[6],xmm1[6],xmm13[7],xmm1[7] +; SSE2-NEXT: movdqa %xmm13, %xmm0 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm1[0],xmm7[1],xmm1[1],xmm7[2],xmm1[2],xmm7[3],xmm1[3],xmm7[4],xmm1[4],xmm7[5],xmm1[5],xmm7[6],xmm1[6],xmm7[7],xmm1[7] +; SSE2-NEXT: movdqa %xmm7, %xmm8 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm8 = xmm8[4],xmm1[4],xmm8[5],xmm1[5],xmm8[6],xmm1[6],xmm8[7],xmm1[7] +; SSE2-NEXT: paddd %xmm0, %xmm8 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm13 = xmm13[0],xmm1[0],xmm13[1],xmm1[1],xmm13[2],xmm1[2],xmm13[3],xmm1[3] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm1[0],xmm7[1],xmm1[1],xmm7[2],xmm1[2],xmm7[3],xmm1[3] +; SSE2-NEXT: paddd %xmm13, %xmm7 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload ; SSE2-NEXT: psubd %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) # 16-byte Spill -; SSE2-NEXT: psubd %xmm0, %xmm10 -; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload -; SSE2-NEXT: psubd %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) # 16-byte Spill -; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload -; SSE2-NEXT: psubd %xmm0, %xmm2 +; SSE2-NEXT: psubd %xmm0, %xmm3 +; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload +; SSE2-NEXT: psubd %xmm0, %xmm13 +; SSE2-NEXT: movdqa %xmm13, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Reload +; SSE2-NEXT: psubd %xmm0, %xmm5 +; SSE2-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: psubd %xmm0, %xmm12 ; SSE2-NEXT: psubd %xmm0, %xmm4 ; SSE2-NEXT: psubd %xmm0, %xmm15 -; SSE2-NEXT: psubd %xmm0, %xmm13 -; SSE2-NEXT: psubd %xmm0, %xmm9 +; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Reload +; SSE2-NEXT: psubd %xmm0, %xmm5 +; SSE2-NEXT: psubd %xmm0, %xmm10 ; SSE2-NEXT: psubd %xmm0, %xmm6 ; SSE2-NEXT: psubd %xmm0, %xmm14 ; SSE2-NEXT: psubd %xmm0, %xmm11 +; SSE2-NEXT: psubd %xmm0, %xmm9 +; SSE2-NEXT: psubd %xmm0, %xmm2 ; SSE2-NEXT: psubd %xmm0, %xmm8 -; SSE2-NEXT: psubd %xmm0, %xmm3 -; SSE2-NEXT: psubd %xmm0, %xmm5 ; SSE2-NEXT: psubd %xmm0, %xmm7 -; SSE2-NEXT: psrld $1, %xmm10 -; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; SSE2-NEXT: psrld $1, %xmm3 ; SSE2-NEXT: psrld $1, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: pand %xmm0, %xmm10 -; SSE2-NEXT: packuswb %xmm1, %xmm10 -; SSE2-NEXT: psrld $1, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm13 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; SSE2-NEXT: pand %xmm13, %xmm1 +; SSE2-NEXT: pand %xmm13, %xmm3 +; SSE2-NEXT: packuswb %xmm1, %xmm3 +; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: psrld $1, %xmm0 ; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload ; SSE2-NEXT: psrld $1, %xmm1 -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: packuswb %xmm1, %xmm2 -; SSE2-NEXT: packuswb %xmm10, %xmm2 -; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: pand %xmm13, %xmm1 +; SSE2-NEXT: pand %xmm13, %xmm0 +; SSE2-NEXT: packuswb %xmm1, %xmm0 +; SSE2-NEXT: packuswb %xmm3, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrld $1, %xmm4 ; SSE2-NEXT: psrld $1, %xmm12 -; SSE2-NEXT: pand %xmm0, %xmm12 -; SSE2-NEXT: pand %xmm0, %xmm4 +; SSE2-NEXT: pand %xmm13, %xmm12 +; SSE2-NEXT: pand %xmm13, %xmm4 ; SSE2-NEXT: packuswb %xmm12, %xmm4 -; SSE2-NEXT: psrld $1, %xmm13 +; SSE2-NEXT: movdqa %xmm5, %xmm0 +; SSE2-NEXT: psrld $1, %xmm0 ; SSE2-NEXT: psrld $1, %xmm15 -; SSE2-NEXT: pand %xmm0, %xmm15 -; SSE2-NEXT: pand %xmm0, %xmm13 -; SSE2-NEXT: packuswb %xmm15, %xmm13 -; SSE2-NEXT: packuswb %xmm4, %xmm13 +; SSE2-NEXT: pand %xmm13, %xmm15 +; SSE2-NEXT: pand %xmm13, %xmm0 +; SSE2-NEXT: packuswb %xmm15, %xmm0 +; SSE2-NEXT: packuswb %xmm4, %xmm0 ; SSE2-NEXT: psrld $1, %xmm6 -; SSE2-NEXT: psrld $1, %xmm9 -; SSE2-NEXT: pand %xmm0, %xmm9 -; SSE2-NEXT: pand %xmm0, %xmm6 -; SSE2-NEXT: packuswb %xmm9, %xmm6 +; SSE2-NEXT: psrld $1, %xmm10 +; SSE2-NEXT: pand %xmm13, %xmm10 +; SSE2-NEXT: pand %xmm13, %xmm6 +; SSE2-NEXT: packuswb %xmm10, %xmm6 ; SSE2-NEXT: psrld $1, %xmm11 ; SSE2-NEXT: psrld $1, %xmm14 -; SSE2-NEXT: pand %xmm0, %xmm14 -; SSE2-NEXT: pand %xmm0, %xmm11 +; SSE2-NEXT: pand %xmm13, %xmm14 +; SSE2-NEXT: pand %xmm13, %xmm11 ; SSE2-NEXT: packuswb %xmm14, %xmm11 ; SSE2-NEXT: packuswb %xmm6, %xmm11 -; SSE2-NEXT: psrld $1, %xmm3 -; SSE2-NEXT: psrld $1, %xmm8 -; SSE2-NEXT: pand %xmm0, %xmm8 -; SSE2-NEXT: pand %xmm0, %xmm3 -; SSE2-NEXT: packuswb %xmm8, %xmm3 +; SSE2-NEXT: psrld $1, %xmm2 +; SSE2-NEXT: psrld $1, %xmm9 +; SSE2-NEXT: pand %xmm13, %xmm9 +; SSE2-NEXT: pand %xmm13, %xmm2 +; SSE2-NEXT: packuswb %xmm9, %xmm2 ; SSE2-NEXT: psrld $1, %xmm7 -; SSE2-NEXT: psrld $1, %xmm5 -; SSE2-NEXT: pand %xmm0, %xmm5 -; SSE2-NEXT: pand %xmm0, %xmm7 -; SSE2-NEXT: packuswb %xmm5, %xmm7 -; SSE2-NEXT: packuswb %xmm3, %xmm7 +; SSE2-NEXT: psrld $1, %xmm8 +; SSE2-NEXT: pand %xmm13, %xmm8 +; SSE2-NEXT: pand %xmm13, %xmm7 +; SSE2-NEXT: packuswb %xmm8, %xmm7 +; SSE2-NEXT: packuswb %xmm2, %xmm7 ; SSE2-NEXT: movdqu %xmm7, (%rax) ; SSE2-NEXT: movdqu %xmm11, (%rax) -; SSE2-NEXT: movdqu %xmm13, (%rax) +; SSE2-NEXT: movdqu %xmm0, (%rax) ; SSE2-NEXT: movdqu %xmm1, (%rax) ; SSE2-NEXT: retq ; @@ -1406,8 +1406,9 @@ ; SSE2-NEXT: punpckhwd {{.*#+}} xmm11 = xmm11[4],xmm0[4],xmm11[5],xmm0[5],xmm11[6],xmm0[6],xmm11[7],xmm0[7] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: movdqa %xmm2, %xmm10 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm10 = xmm10[4],xmm0[4],xmm10[5],xmm0[5],xmm10[6],xmm0[6],xmm10[7],xmm0[7] +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; SSE2-NEXT: movdqa %xmm3, %xmm10 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] ; SSE2-NEXT: movdqa %xmm1, %xmm4 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15] @@ -1449,12 +1450,13 @@ ; SSE2-NEXT: psubd %xmm0, %xmm11 ; SSE2-NEXT: psubd %xmm0, %xmm5 ; SSE2-NEXT: psubd %xmm0, %xmm10 +; SSE2-NEXT: movdqa %xmm10, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: psubd %xmm0, %xmm2 ; SSE2-NEXT: psubd %xmm0, %xmm3 -; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: psubd %xmm0, %xmm4 -; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload -; SSE2-NEXT: psubd %xmm0, %xmm3 +; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload +; SSE2-NEXT: psubd %xmm0, %xmm10 +; SSE2-NEXT: movdqa %xmm10, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: psubd %xmm0, %xmm1 ; SSE2-NEXT: psrld $1, %xmm7 ; SSE2-NEXT: psrld $1, %xmm15 @@ -1485,19 +1487,19 @@ ; SSE2-NEXT: pand %xmm0, %xmm5 ; SSE2-NEXT: packuswb %xmm11, %xmm5 ; SSE2-NEXT: psrld $1, %xmm2 -; SSE2-NEXT: psrld $1, %xmm10 -; SSE2-NEXT: pand %xmm0, %xmm10 +; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm6 # 16-byte Reload +; SSE2-NEXT: psrld $1, %xmm6 +; SSE2-NEXT: pand %xmm0, %xmm6 ; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: packuswb %xmm10, %xmm2 +; SSE2-NEXT: packuswb %xmm6, %xmm2 ; SSE2-NEXT: packuswb %xmm5, %xmm2 ; SSE2-NEXT: psrld $1, %xmm4 -; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Reload -; SSE2-NEXT: psrld $1, %xmm5 -; SSE2-NEXT: pand %xmm0, %xmm5 +; SSE2-NEXT: psrld $1, %xmm3 +; SSE2-NEXT: pand %xmm0, %xmm3 ; SSE2-NEXT: pand %xmm0, %xmm4 -; SSE2-NEXT: packuswb %xmm5, %xmm4 +; SSE2-NEXT: packuswb %xmm3, %xmm4 ; SSE2-NEXT: psrld $1, %xmm1 -; SSE2-NEXT: movdqa %xmm3, %xmm5 +; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Reload ; SSE2-NEXT: psrld $1, %xmm5 ; SSE2-NEXT: pand %xmm0, %xmm5 ; SSE2-NEXT: pand %xmm0, %xmm1 Index: test/CodeGen/X86/avx-intel-ocl.ll =================================================================== --- test/CodeGen/X86/avx-intel-ocl.ll +++ test/CodeGen/X86/avx-intel-ocl.ll @@ -122,8 +122,8 @@ ; pass parameters in registers for 64-bit platform ; X64-LABEL: test_int -; X64: leal {{.*}}, %edi ; X64: movl {{.*}}, %esi +; X64: leal {{.*}}, %edi ; X64: call ; X64: addl {{.*}}, %eax define i32 @test_int(i32 %a, i32 %b) nounwind { Index: test/CodeGen/X86/avx-vinsertf128.ll =================================================================== --- test/CodeGen/X86/avx-vinsertf128.ll +++ test/CodeGen/X86/avx-vinsertf128.ll @@ -75,8 +75,7 @@ define <4 x double> @insert_undef_pd(<4 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: insert_undef_pd: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %XMM1 %XMM1 %YMM1 -; CHECK-NEXT: vmovaps %ymm1, %ymm0 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> undef, <2 x double> %a1, i8 0) ret <4 x double> %res @@ -86,8 +85,7 @@ define <8 x float> @insert_undef_ps(<8 x float> %a0, <4 x float> %a1) { ; CHECK-LABEL: insert_undef_ps: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %XMM1 %XMM1 %YMM1 -; CHECK-NEXT: vmovaps %ymm1, %ymm0 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %a1, i8 0) ret <8 x float> %res @@ -97,8 +95,7 @@ define <8 x i32> @insert_undef_si(<8 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: insert_undef_si: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %XMM1 %XMM1 %YMM1 -; CHECK-NEXT: vmovaps %ymm1, %ymm0 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> undef, <4 x i32> %a1, i8 0) ret <8 x i32> %res Index: test/CodeGen/X86/avx512-arith.ll =================================================================== --- test/CodeGen/X86/avx512-arith.ll +++ test/CodeGen/X86/avx512-arith.ll @@ -930,10 +930,10 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, ; CHECK-LABEL: test_mask_broadcast_vaddpd: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vpcmpneqq %zmm0, %zmm2, %k1 -; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} -; CHECK-NEXT: vmovapd %zmm1, %zmm0 +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpneqq %zmm1, %zmm2, %k1 +; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} ; CHECK-NEXT: retq double* %j, <8 x i64> %mask1) nounwind { %mask = icmp ne <8 x i64> %mask1, zeroinitializer Index: test/CodeGen/X86/avx512-calling-conv.ll =================================================================== --- test/CodeGen/X86/avx512-calling-conv.ll +++ test/CodeGen/X86/avx512-calling-conv.ll @@ -358,9 +358,9 @@ define i32 @test10(i32 %a, i32 %b, i1 %cond) { ; ALL_X64-LABEL: test10: ; ALL_X64: ## BB#0: -; ALL_X64-NEXT: testb $1, %dl -; ALL_X64-NEXT: cmovel %esi, %edi ; ALL_X64-NEXT: movl %edi, %eax +; ALL_X64-NEXT: testb $1, %dl +; ALL_X64-NEXT: cmovel %esi, %eax ; ALL_X64-NEXT: retq ; ; KNL_X32-LABEL: test10: Index: test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- test/CodeGen/X86/avx512-insert-extract.ll +++ test/CodeGen/X86/avx512-insert-extract.ll @@ -197,25 +197,25 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) { ; KNL-LABEL: test12: ; KNL: ## BB#0: +; KNL-NEXT: movq %rdi, %rax ; KNL-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 ; KNL-NEXT: kshiftlw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: testb $1, %al -; KNL-NEXT: cmoveq %rsi, %rdi -; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: testb $1, %cl +; KNL-NEXT: cmoveq %rsi, %rax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test12: ; SKX: ## BB#0: +; SKX-NEXT: movq %rdi, %rax ; SKX-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 ; SKX-NEXT: kshiftlb $7, %k0, %k0 ; SKX-NEXT: kshiftrb $7, %k0, %k0 -; SKX-NEXT: kmovd %k0, %eax -; SKX-NEXT: testb $1, %al -; SKX-NEXT: cmoveq %rsi, %rdi -; SKX-NEXT: movq %rdi, %rax +; SKX-NEXT: kmovd %k0, %ecx +; SKX-NEXT: testb $1, %cl +; SKX-NEXT: cmoveq %rsi, %rax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %cmpvector_func.i = icmp slt <16 x i64> %a, %b @@ -263,25 +263,25 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) { ; KNL-LABEL: test14: ; KNL: ## BB#0: +; KNL-NEXT: movq %rdi, %rax ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 ; KNL-NEXT: kshiftlw $11, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: testb $1, %al -; KNL-NEXT: cmoveq %rsi, %rdi -; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: testb $1, %cl +; KNL-NEXT: cmoveq %rsi, %rax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test14: ; SKX: ## BB#0: +; SKX-NEXT: movq %rdi, %rax ; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 ; SKX-NEXT: kshiftlb $3, %k0, %k0 ; SKX-NEXT: kshiftrb $7, %k0, %k0 -; SKX-NEXT: kmovd %k0, %eax -; SKX-NEXT: testb $1, %al -; SKX-NEXT: cmoveq %rsi, %rdi -; SKX-NEXT: movq %rdi, %rax +; SKX-NEXT: kmovd %k0, %ecx +; SKX-NEXT: testb $1, %cl +; SKX-NEXT: cmoveq %rsi, %rax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %cmpvector_func.i = icmp slt <8 x i64> %a, %b Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -212,10 +212,11 @@ ; CHECK-LABEL: mand16: ; CHECK: ## BB#0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: xorl %esi, %eax -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: xorl %esi, %ecx +; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %ma = bitcast i16 %x to <16 x i1> %mb = bitcast i16 %y to <16 x i1> Index: test/CodeGen/X86/avx512-regcall-NoMask.ll =================================================================== --- test/CodeGen/X86/avx512-regcall-NoMask.ll +++ test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -9,19 +9,19 @@ ; X32: # BB#0: ; X32-NEXT: incb %al ; X32-NEXT: # kill: %AL %AL %EAX -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_argReti1: ; WIN64: # BB#0: ; WIN64-NEXT: incb %al ; WIN64-NEXT: # kill: %AL %AL %EAX -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; ; LINUXOSX64-LABEL: test_argReti1: ; LINUXOSX64: # BB#0: ; LINUXOSX64-NEXT: incb %al ; LINUXOSX64-NEXT: # kill: %AL %AL %EAX -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %add = add i1 %a, 1 ret i1 %add } @@ -36,7 +36,7 @@ ; X32-NEXT: calll _test_argReti1 ; X32-NEXT: incb %al ; X32-NEXT: popl %esp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_CallargReti1: ; WIN64: # BB#0: @@ -48,7 +48,7 @@ ; WIN64-NEXT: callq test_argReti1 ; WIN64-NEXT: incb %al ; WIN64-NEXT: popq %rsp -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; WIN64-NEXT: .seh_handlerdata ; WIN64-NEXT: .text ; WIN64-NEXT: .seh_endproc @@ -63,7 +63,7 @@ ; LINUXOSX64-NEXT: callq test_argReti1 ; LINUXOSX64-NEXT: incb %al ; LINUXOSX64-NEXT: popq %rsp -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %b = add i1 %a, 1 %c = call x86_regcallcc i1 @test_argReti1(i1 %b) %d = add i1 %c, 1 @@ -76,19 +76,19 @@ ; X32: # BB#0: ; X32-NEXT: incb %al ; X32-NEXT: # kill: %AL %AL %EAX -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_argReti8: ; WIN64: # BB#0: ; WIN64-NEXT: incb %al ; WIN64-NEXT: # kill: %AL %AL %EAX -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; ; LINUXOSX64-LABEL: test_argReti8: ; LINUXOSX64: # BB#0: ; LINUXOSX64-NEXT: incb %al ; LINUXOSX64-NEXT: # kill: %AL %AL %EAX -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %add = add i8 %a, 1 ret i8 %add } @@ -103,7 +103,7 @@ ; X32-NEXT: calll _test_argReti8 ; X32-NEXT: incb %al ; X32-NEXT: popl %esp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_CallargReti8: ; WIN64: # BB#0: @@ -115,7 +115,7 @@ ; WIN64-NEXT: callq test_argReti8 ; WIN64-NEXT: incb %al ; WIN64-NEXT: popq %rsp -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; WIN64-NEXT: .seh_handlerdata ; WIN64-NEXT: .text ; WIN64-NEXT: .seh_endproc @@ -130,7 +130,7 @@ ; LINUXOSX64-NEXT: callq test_argReti8 ; LINUXOSX64-NEXT: incb %al ; LINUXOSX64-NEXT: popq %rsp -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %b = add i8 %a, 1 %c = call x86_regcallcc i8 @test_argReti8(i8 %b) %d = add i8 %c, 1 @@ -143,19 +143,19 @@ ; X32: # BB#0: ; X32-NEXT: incl %eax ; X32-NEXT: # kill: %AX %AX %EAX -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_argReti16: ; WIN64: # BB#0: ; WIN64-NEXT: incl %eax ; WIN64-NEXT: # kill: %AX %AX %EAX -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; ; LINUXOSX64-LABEL: test_argReti16: ; LINUXOSX64: # BB#0: ; LINUXOSX64-NEXT: incl %eax ; LINUXOSX64-NEXT: # kill: %AX %AX %EAX -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %add = add i16 %a, 1 ret i16 %add } @@ -171,7 +171,7 @@ ; X32-NEXT: incl %eax ; X32-NEXT: # kill: %AX %AX %EAX ; X32-NEXT: popl %esp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_CallargReti16: ; WIN64: # BB#0: @@ -184,7 +184,7 @@ ; WIN64-NEXT: incl %eax ; WIN64-NEXT: # kill: %AX %AX %EAX ; WIN64-NEXT: popq %rsp -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; WIN64-NEXT: .seh_handlerdata ; WIN64-NEXT: .text ; WIN64-NEXT: .seh_endproc @@ -200,7 +200,7 @@ ; LINUXOSX64-NEXT: incl %eax ; LINUXOSX64-NEXT: # kill: %AX %AX %EAX ; LINUXOSX64-NEXT: popq %rsp -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %b = add i16 %a, 1 %c = call x86_regcallcc i16 @test_argReti16(i16 %b) %d = add i16 %c, 1 @@ -212,17 +212,17 @@ ; X32-LABEL: test_argReti32: ; X32: # BB#0: ; X32-NEXT: incl %eax -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_argReti32: ; WIN64: # BB#0: ; WIN64-NEXT: incl %eax -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; ; LINUXOSX64-LABEL: test_argReti32: ; LINUXOSX64: # BB#0: ; LINUXOSX64-NEXT: incl %eax -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %add = add i32 %a, 1 ret i32 %add } @@ -236,7 +236,7 @@ ; X32-NEXT: calll _test_argReti32 ; X32-NEXT: incl %eax ; X32-NEXT: popl %esp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_CallargReti32: ; WIN64: # BB#0: @@ -247,7 +247,7 @@ ; WIN64-NEXT: callq test_argReti32 ; WIN64-NEXT: incl %eax ; WIN64-NEXT: popq %rsp -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; WIN64-NEXT: .seh_handlerdata ; WIN64-NEXT: .text ; WIN64-NEXT: .seh_endproc @@ -261,7 +261,7 @@ ; LINUXOSX64-NEXT: callq test_argReti32 ; LINUXOSX64-NEXT: incl %eax ; LINUXOSX64-NEXT: popq %rsp -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %b = add i32 %a, 1 %c = call x86_regcallcc i32 @test_argReti32(i32 %b) %d = add i32 %c, 1 @@ -274,19 +274,19 @@ ; X32: # BB#0: ; X32-NEXT: addl $3, %eax ; X32-NEXT: adcl $1, %ecx -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_argReti64: ; WIN64: # BB#0: ; WIN64-NEXT: movabsq $4294967299, %rcx # imm = 0x100000003 ; WIN64-NEXT: addq %rcx, %rax -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; ; LINUXOSX64-LABEL: test_argReti64: ; LINUXOSX64: # BB#0: ; LINUXOSX64-NEXT: movabsq $4294967299, %rcx # imm = 0x100000003 ; LINUXOSX64-NEXT: addq %rcx, %rax -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %add = add i64 %a, 4294967299 ret i64 %add } @@ -302,7 +302,7 @@ ; X32-NEXT: addl $1, %eax ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: popl %esp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_CallargReti64: ; WIN64: # BB#0: @@ -313,7 +313,7 @@ ; WIN64-NEXT: callq test_argReti64 ; WIN64-NEXT: incq %rax ; WIN64-NEXT: popq %rsp -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; WIN64-NEXT: .seh_handlerdata ; WIN64-NEXT: .text ; WIN64-NEXT: .seh_endproc @@ -327,7 +327,7 @@ ; LINUXOSX64-NEXT: callq test_argReti64 ; LINUXOSX64-NEXT: incq %rax ; LINUXOSX64-NEXT: popq %rsp -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %b = add i64 %a, 1 %c = call x86_regcallcc i64 @test_argReti64(i64 %b) %d = add i64 %c, 1 @@ -339,17 +339,17 @@ ; X32-LABEL: test_argRetFloat: ; X32: # BB#0: ; X32-NEXT: vaddss __real@3f800000, %xmm0, %xmm0 -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_argRetFloat: ; WIN64: # BB#0: ; WIN64-NEXT: vaddss __real@{{.*}}(%rip), %xmm0, %xmm0 -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; ; LINUXOSX64-LABEL: test_argRetFloat: ; LINUXOSX64: # BB#0: ; LINUXOSX64-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %add = fadd float 1.0, %a ret float %add } @@ -368,7 +368,7 @@ ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload ; X32-NEXT: addl $24, %esp ; X32-NEXT: popl %esp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_CallargRetFloat: ; WIN64: # BB#0: @@ -386,7 +386,7 @@ ; WIN64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload ; WIN64-NEXT: addq $16, %rsp ; WIN64-NEXT: popq %rsp -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; WIN64-NEXT: .seh_handlerdata ; WIN64-NEXT: .text ; WIN64-NEXT: .seh_endproc @@ -407,7 +407,7 @@ ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload ; LINUXOSX64-NEXT: addq $16, %rsp ; LINUXOSX64-NEXT: popq %rsp -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %b = fadd float 1.0, %a %c = call x86_regcallcc float @test_argRetFloat(float %b) %d = fadd float 1.0, %c @@ -419,17 +419,17 @@ ; X32-LABEL: test_argRetDouble: ; X32: # BB#0: ; X32-NEXT: vaddsd __real@3ff0000000000000, %xmm0, %xmm0 -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_argRetDouble: ; WIN64: # BB#0: ; WIN64-NEXT: vaddsd __real@{{.*}}(%rip), %xmm0, %xmm0 -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; ; LINUXOSX64-LABEL: test_argRetDouble: ; LINUXOSX64: # BB#0: ; LINUXOSX64-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %add = fadd double %a, 1.0 ret double %add } @@ -448,7 +448,7 @@ ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload ; X32-NEXT: addl $24, %esp ; X32-NEXT: popl %esp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_CallargRetDouble: ; WIN64: # BB#0: @@ -466,7 +466,7 @@ ; WIN64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload ; WIN64-NEXT: addq $16, %rsp ; WIN64-NEXT: popq %rsp -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; WIN64-NEXT: .seh_handlerdata ; WIN64-NEXT: .text ; WIN64-NEXT: .seh_endproc @@ -487,7 +487,7 @@ ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload ; LINUXOSX64-NEXT: addq $16, %rsp ; LINUXOSX64-NEXT: popq %rsp -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %b = fadd double 1.0, %a %c = call x86_regcallcc double @test_argRetDouble(double %b) %d = fadd double 1.0, %c @@ -499,17 +499,17 @@ ; X32-LABEL: test_argRetf80: ; X32: # BB#0: ; X32-NEXT: fadd %st(0), %st(0) -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_argRetf80: ; WIN64: # BB#0: ; WIN64-NEXT: fadd %st(0), %st(0) -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; ; LINUXOSX64-LABEL: test_argRetf80: ; LINUXOSX64: # BB#0: ; LINUXOSX64-NEXT: fadd %st(0), %st(0) -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %r0 = fadd x86_fp80 %a0, %a0 ret x86_fp80 %r0 } @@ -523,7 +523,7 @@ ; X32-NEXT: calll _test_argRetf80 ; X32-NEXT: fadd %st(0), %st(0) ; X32-NEXT: popl %esp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_CallargRetf80: ; WIN64: # BB#0: @@ -534,7 +534,7 @@ ; WIN64-NEXT: callq test_argRetf80 ; WIN64-NEXT: fadd %st(0), %st(0) ; WIN64-NEXT: popq %rsp -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; WIN64-NEXT: .seh_handlerdata ; WIN64-NEXT: .text ; WIN64-NEXT: .seh_endproc @@ -548,7 +548,7 @@ ; LINUXOSX64-NEXT: callq test_argRetf80 ; LINUXOSX64-NEXT: fadd %st(0), %st(0) ; LINUXOSX64-NEXT: popq %rsp -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %b = fadd x86_fp80 %a, %a %c = call x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %b) %d = fadd x86_fp80 %c, %c @@ -560,17 +560,17 @@ ; X32-LABEL: test_argRetPointer: ; X32: # BB#0: ; X32-NEXT: incl %eax -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_argRetPointer: ; WIN64: # BB#0: ; WIN64-NEXT: incl %eax -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; ; LINUXOSX64-LABEL: test_argRetPointer: ; LINUXOSX64: # BB#0: ; LINUXOSX64-NEXT: incl %eax -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %b = ptrtoint [4 x i32]* %a to i32 %c = add i32 %b, 1 %d = inttoptr i32 %c to [4 x i32]* @@ -586,7 +586,7 @@ ; X32-NEXT: calll _test_argRetPointer ; X32-NEXT: incl %eax ; X32-NEXT: popl %esp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_CallargRetPointer: ; WIN64: # BB#0: @@ -597,7 +597,7 @@ ; WIN64-NEXT: callq test_argRetPointer ; WIN64-NEXT: incl %eax ; WIN64-NEXT: popq %rsp -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; WIN64-NEXT: .seh_handlerdata ; WIN64-NEXT: .text ; WIN64-NEXT: .seh_endproc @@ -611,7 +611,7 @@ ; LINUXOSX64-NEXT: callq test_argRetPointer ; LINUXOSX64-NEXT: incl %eax ; LINUXOSX64-NEXT: popq %rsp -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %b = ptrtoint [4 x i32]* %a to i32 %c = add i32 %b, 1 %d = inttoptr i32 %c to [4 x i32]* @@ -627,17 +627,17 @@ ; X32-LABEL: test_argRet128Vector: ; X32: # BB#0: ; X32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_argRet128Vector: ; WIN64: # BB#0: ; WIN64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; ; LINUXOSX64-LABEL: test_argRet128Vector: ; LINUXOSX64: # BB#0: ; LINUXOSX64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %d = select <4 x i1> undef , <4 x i32> %a, <4 x i32> %b ret <4 x i32> %d } @@ -656,7 +656,7 @@ ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload ; X32-NEXT: addl $24, %esp ; X32-NEXT: popl %esp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_CallargRet128Vector: ; WIN64: # BB#0: @@ -674,7 +674,7 @@ ; WIN64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload ; WIN64-NEXT: addq $16, %rsp ; WIN64-NEXT: popq %rsp -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; WIN64-NEXT: .seh_handlerdata ; WIN64-NEXT: .text ; WIN64-NEXT: .seh_endproc @@ -695,7 +695,7 @@ ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload ; LINUXOSX64-NEXT: addq $16, %rsp ; LINUXOSX64-NEXT: popq %rsp -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %b = call x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i32> %a, <4 x i32> %a) %c = select <4 x i1> undef , <4 x i32> %a, <4 x i32> %b ret <4 x i32> %c @@ -706,17 +706,17 @@ ; X32-LABEL: test_argRet256Vector: ; X32: # BB#0: ; X32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_argRet256Vector: ; WIN64: # BB#0: ; WIN64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; ; LINUXOSX64-LABEL: test_argRet256Vector: ; LINUXOSX64: # BB#0: ; LINUXOSX64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %d = select <8 x i1> undef , <8 x i32> %a, <8 x i32> %b ret <8 x i32> %d } @@ -734,7 +734,7 @@ ; X32-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} ; X32-NEXT: addl $56, %esp ; X32-NEXT: popl %esp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_CallargRet256Vector: ; WIN64: # BB#0: @@ -750,7 +750,7 @@ ; WIN64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} ; WIN64-NEXT: addq $48, %rsp ; WIN64-NEXT: popq %rsp -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; WIN64-NEXT: .seh_handlerdata ; WIN64-NEXT: .text ; WIN64-NEXT: .seh_endproc @@ -769,7 +769,7 @@ ; LINUXOSX64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} ; LINUXOSX64-NEXT: addq $48, %rsp ; LINUXOSX64-NEXT: popq %rsp -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %b = call x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i32> %a, <8 x i32> %a) %c = select <8 x i1> undef , <8 x i32> %a, <8 x i32> %b ret <8 x i32> %c @@ -780,17 +780,17 @@ ; X32-LABEL: test_argRet512Vector: ; X32: # BB#0: ; X32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_argRet512Vector: ; WIN64: # BB#0: ; WIN64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; ; LINUXOSX64-LABEL: test_argRet512Vector: ; LINUXOSX64: # BB#0: ; LINUXOSX64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %d = select <16 x i1> undef , <16 x i32> %a, <16 x i32> %b ret <16 x i32> %d } @@ -808,7 +808,7 @@ ; X32-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; X32-NEXT: addl $120, %esp ; X32-NEXT: popl %esp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_CallargRet512Vector: ; WIN64: # BB#0: @@ -824,7 +824,7 @@ ; WIN64-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; WIN64-NEXT: addq $112, %rsp ; WIN64-NEXT: popq %rsp -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; WIN64-NEXT: .seh_handlerdata ; WIN64-NEXT: .text ; WIN64-NEXT: .seh_endproc @@ -843,7 +843,7 @@ ; LINUXOSX64-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; LINUXOSX64-NEXT: addq $112, %rsp ; LINUXOSX64-NEXT: popq %rsp -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %b = call x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i32> %a, <16 x i32> %a) %c = select <16 x i1> undef , <16 x i32> %a, <16 x i32> %b ret <16 x i32> %c @@ -867,7 +867,7 @@ ; X32-NEXT: vmovups (%esp), %xmm6 # 16-byte Reload ; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload ; X32-NEXT: addl $44, %esp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: testf32_inp: ; WIN64: # BB#0: @@ -879,7 +879,7 @@ ; WIN64-NEXT: vsubps %zmm1, %zmm7, %zmm1 ; WIN64-NEXT: vaddps %zmm4, %zmm0, %zmm0 ; WIN64-NEXT: vaddps %zmm5, %zmm1, %zmm1 -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; ; LINUXOSX64-LABEL: testf32_inp: ; LINUXOSX64: # BB#0: @@ -891,7 +891,7 @@ ; LINUXOSX64-NEXT: vsubps %zmm1, %zmm7, %zmm1 ; LINUXOSX64-NEXT: vaddps %zmm4, %zmm0, %zmm0 ; LINUXOSX64-NEXT: vaddps %zmm5, %zmm1, %zmm1 -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %x1 = fadd <32 x float> %a, %b %x2 = fmul <32 x float> %a, %b %x3 = fsub <32 x float> %x1, %x2 @@ -907,51 +907,50 @@ ; X32-NEXT: pushl %ebx ; X32-NEXT: subl $20, %esp ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %edi, %esi -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, %edx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: subl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl %edi, %ebp -; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp -; X32-NEXT: imull %ebp, %edx -; X32-NEXT: subl %esi, %ebx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: subl %ecx, %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl %esi, %ecx +; X32-NEXT: movl %esi, %ebp +; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp +; X32-NEXT: imull %ebp, %ebx +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: subl %edi, %ebp +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: imull %ebx, %ecx -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %ebp +; X32-NEXT: imull %ebp, %ecx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %ebp ; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax ; X32-NEXT: imull %ebp, %eax -; X32-NEXT: addl %eax, %edx +; X32-NEXT: addl %eax, %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload ; X32-NEXT: movl (%esp), %ebp # 4-byte Reload ; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: addl {{[0-9]+}}(%esp), %edi -; X32-NEXT: imull %eax, %edi ; X32-NEXT: addl {{[0-9]+}}(%esp), %esi -; X32-NEXT: imull %ebp, %esi -; X32-NEXT: addl %edi, %esi +; X32-NEXT: imull %eax, %esi +; X32-NEXT: addl {{[0-9]+}}(%esp), %edx +; X32-NEXT: imull %ebp, %edx +; X32-NEXT: addl %esi, %edx ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: imull %ebx, %ecx -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl %edx, %eax +; X32-NEXT: imull %edi, %ecx +; X32-NEXT: addl %edx, %ecx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: addl $20, %esp ; X32-NEXT: popl %ebx ; X32-NEXT: popl %ebp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: testi32_inp: ; WIN64: # BB#0: @@ -990,7 +989,7 @@ ; WIN64-NEXT: popq %rbx ; WIN64-NEXT: popq %rbp ; WIN64-NEXT: popq %r13 -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; ; LINUXOSX64-LABEL: testi32_inp: ; LINUXOSX64: # BB#0: @@ -1028,7 +1027,7 @@ ; LINUXOSX64-NEXT: addl %r15d, %eax ; LINUXOSX64-NEXT: popq %rbx ; LINUXOSX64-NEXT: popq %rbp -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %x1 = sub i32 %a1, %a2 %x2 = sub i32 %a3, %a4 %x3 = sub i32 %a5, %a6 @@ -1081,7 +1080,7 @@ ; X32-NEXT: vaddps 584(%ebp), %zmm1, %zmm1 ; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: testf32_stack: ; WIN64: # BB#0: @@ -1107,7 +1106,7 @@ ; WIN64-NEXT: vaddps 80(%rbp), %zmm1, %zmm1 ; WIN64-NEXT: movq %rbp, %rsp ; WIN64-NEXT: popq %rbp -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; ; LINUXOSX64-LABEL: testf32_stack: ; LINUXOSX64: # BB#0: @@ -1133,7 +1132,7 @@ ; LINUXOSX64-NEXT: vaddps 80(%rbp), %zmm1, %zmm1 ; LINUXOSX64-NEXT: movq %rbp, %rsp ; LINUXOSX64-NEXT: popq %rbp -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %x1 = fadd <32 x float> %a0, %b0 %x2 = fadd <32 x float> %c0, %x1 %x3 = fadd <32 x float> %a1, %x2 @@ -1173,7 +1172,7 @@ ; X32-NEXT: vcvttsd2si %xmm0, %eax ; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_argRetMixTypes: ; WIN64: # BB#0: @@ -1190,7 +1189,7 @@ ; WIN64-NEXT: vcvtsi2sdl (%rsi), %xmm2, %xmm1 ; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; WIN64-NEXT: vcvttsd2si %xmm0, %eax -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; ; LINUXOSX64-LABEL: test_argRetMixTypes: ; LINUXOSX64: # BB#0: @@ -1207,7 +1206,7 @@ ; LINUXOSX64-NEXT: vcvtsi2sdl (%rsi), %xmm2, %xmm1 ; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; LINUXOSX64-NEXT: vcvttsd2si %xmm0, %eax -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %8 = fpext float %1 to double %9 = fadd double %8, %0 %10 = sitofp i8 %2 to double @@ -1235,7 +1234,7 @@ ; X32-NEXT: movb $7, %cl ; X32-NEXT: movl $999, %edx # imm = 0x3E7 ; X32-NEXT: xorl %edi, %edi -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; WIN64-LABEL: test_argMultiRet: ; WIN64: # BB#0: @@ -1243,7 +1242,7 @@ ; WIN64-NEXT: movl $4, %eax ; WIN64-NEXT: movb $7, %cl ; WIN64-NEXT: movl $999, %edx # imm = 0x3E7 -; WIN64-NEXT: retq +; WIN64-NEXT: ret{{[l|q]}} ; ; LINUXOSX64-LABEL: test_argMultiRet: ; LINUXOSX64: # BB#0: @@ -1251,7 +1250,7 @@ ; LINUXOSX64-NEXT: movl $4, %eax ; LINUXOSX64-NEXT: movb $7, %cl ; LINUXOSX64-NEXT: movl $999, %edx # imm = 0x3E7 -; LINUXOSX64-NEXT: retq +; LINUXOSX64-NEXT: ret{{[l|q]}} %6 = fadd double %1, 5.000000e+00 %7 = insertvalue %struct.complex undef, float %0, 0 %8 = insertvalue %struct.complex %7, double %6, 1 Index: test/CodeGen/X86/avx512-schedule.ll =================================================================== --- test/CodeGen/X86/avx512-schedule.ll +++ test/CodeGen/X86/avx512-schedule.ll @@ -983,18 +983,18 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, double* %j, <8 x i64> %mask1) nounwind { ; GENERIC-LABEL: test_mask_broadcast_vaddpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: vpxor %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpcmpneqq %zmm0, %zmm2, %k1 -; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; GENERIC-NEXT: vpcmpneqq %zmm1, %zmm2, %k1 +; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mask_broadcast_vaddpd: ; SKX: # BB#0: -; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpcmpneqq %zmm0, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [11:0.50] -; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; SKX-NEXT: vpcmpneqq %zmm1, %zmm2, %k1 # sched: [3:1.00] +; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %tmp = load double, double* %j @@ -6868,19 +6868,21 @@ ; GENERIC-LABEL: mand16: ; GENERIC: # BB#0: ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] -; GENERIC-NEXT: xorl %esi, %eax # sched: [1:0.33] -; GENERIC-NEXT: andl %esi, %edi # sched: [1:0.33] -; GENERIC-NEXT: orl %eax, %edi # sched: [1:0.33] -; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: movl %eax, %ecx # sched: [1:0.33] +; GENERIC-NEXT: xorl %esi, %ecx # sched: [1:0.33] +; GENERIC-NEXT: andl %esi, %eax # sched: [1:0.33] +; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: # kill: %AX %AX %EAX ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mand16: ; SKX: # BB#0: ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKX-NEXT: xorl %esi, %eax # sched: [1:0.25] -; SKX-NEXT: andl %esi, %edi # sched: [1:0.25] -; SKX-NEXT: orl %eax, %edi # sched: [1:0.25] -; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: movl %eax, %ecx # sched: [1:0.25] +; SKX-NEXT: xorl %esi, %ecx # sched: [1:0.25] +; SKX-NEXT: andl %esi, %eax # sched: [1:0.25] +; SKX-NEXT: orl %ecx, %eax # sched: [1:0.25] +; SKX-NEXT: # kill: %AX %AX %EAX ; SKX-NEXT: retq # sched: [7:1.00] %ma = bitcast i16 %x to <16 x i1> %mb = bitcast i16 %y to <16 x i1> Index: test/CodeGen/X86/avx512-select.ll =================================================================== --- test/CodeGen/X86/avx512-select.ll +++ test/CodeGen/X86/avx512-select.ll @@ -134,8 +134,9 @@ ; ; X64-LABEL: select05: ; X64: # BB#0: -; X64-NEXT: orl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq %mask = bitcast i8 %m to <8 x i1> %a = bitcast i8 %a.0 to <8 x i1> @@ -184,8 +185,9 @@ ; ; X64-LABEL: select06: ; X64: # BB#0: -; X64-NEXT: andl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq %mask = bitcast i8 %m to <8 x i1> %a = bitcast i8 %a.0 to <8 x i1> Index: test/CodeGen/X86/avx512bw-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512bw-mask-op.ll +++ test/CodeGen/X86/avx512bw-mask-op.ll @@ -81,10 +81,10 @@ ; CHECK-LABEL: mand32: ; CHECK: ## BB#0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: andl %esi, %eax -; CHECK-NEXT: xorl %esi, %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: andl %esi, %ecx +; CHECK-NEXT: xorl %esi, %eax +; CHECK-NEXT: orl %ecx, %eax ; CHECK-NEXT: retq %ma = bitcast i32 %x to <32 x i1> %mb = bitcast i32 %y to <32 x i1> @@ -118,10 +118,10 @@ ; CHECK-LABEL: mand64: ; CHECK: ## BB#0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: andq %rsi, %rax -; CHECK-NEXT: xorq %rsi, %rdi -; CHECK-NEXT: orq %rax, %rdi -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: andq %rsi, %rcx +; CHECK-NEXT: xorq %rsi, %rax +; CHECK-NEXT: orq %rcx, %rax ; CHECK-NEXT: retq %ma = bitcast i64 %x to <64 x i1> %mb = bitcast i64 %y to <64 x i1> Index: test/CodeGen/X86/avx512dq-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512dq-mask-op.ll +++ test/CodeGen/X86/avx512dq-mask-op.ll @@ -34,10 +34,11 @@ ; CHECK-LABEL: mand8: ; CHECK: ## BB#0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: xorl %esi, %eax -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: xorl %esi, %ecx +; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %ma = bitcast i8 %x to <8 x i1> %mb = bitcast i8 %y to <8 x i1> Index: test/CodeGen/X86/avx512vl-arith.ll =================================================================== --- test/CodeGen/X86/avx512vl-arith.ll +++ test/CodeGen/X86/avx512vl-arith.ll @@ -429,10 +429,10 @@ define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i, double* %j, <4 x i64> %mask1) nounwind { ; CHECK-LABEL: test_mask_broadcast_vaddpd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] -; CHECK-NEXT: vpcmpneqq %ymm0, %ymm2, %k1 ## encoding: [0x62,0xf3,0xed,0x28,0x1f,0xc8,0x04] -; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm1, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x39,0x58,0x0f] -; CHECK-NEXT: vmovapd %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] +; CHECK-NEXT: vpcmpneqq %ymm1, %ymm2, %k1 ## encoding: [0x62,0xf3,0xed,0x28,0x1f,0xc9,0x04] +; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0x58,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %tmp = load double, double* %j @@ -879,10 +879,10 @@ define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i, double* %j, <2 x i64> %mask1) nounwind { ; CHECK-LABEL: test_mask_broadcast_vaddpd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] -; CHECK-NEXT: vpcmpneqq %xmm0, %xmm2, %k1 ## encoding: [0x62,0xf3,0xed,0x08,0x1f,0xc8,0x04] -; CHECK-NEXT: vaddpd (%rdi){1to2}, %xmm1, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x19,0x58,0x0f] -; CHECK-NEXT: vmovapd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] +; CHECK-NEXT: vpcmpneqq %xmm1, %xmm2, %k1 ## encoding: [0x62,0xf3,0xed,0x08,0x1f,0xc9,0x04] +; CHECK-NEXT: vaddpd (%rdi){1to2}, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0x58,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %tmp = load double, double* %j Index: test/CodeGen/X86/bigstructret.ll =================================================================== --- test/CodeGen/X86/bigstructret.ll +++ test/CodeGen/X86/bigstructret.ll @@ -8,20 +8,20 @@ define fastcc %0 @ReturnBigStruct() nounwind readnone { ; X86-LABEL: ReturnBigStruct: ; X86: # BB#0: # %entry -; X86-NEXT: movl $24601, 12(%ecx) # imm = 0x6019 -; X86-NEXT: movl $48, 8(%ecx) -; X86-NEXT: movl $24, 4(%ecx) -; X86-NEXT: movl $12, (%ecx) ; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl $24601, 12(%eax) # imm = 0x6019 +; X86-NEXT: movl $48, 8(%eax) +; X86-NEXT: movl $24, 4(%eax) +; X86-NEXT: movl $12, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: ReturnBigStruct: ; X64: # BB#0: # %entry -; X64-NEXT: movabsq $105660490448944, %rax # imm = 0x601900000030 -; X64-NEXT: movq %rax, 8(%rdi) -; X64-NEXT: movabsq $103079215116, %rax # imm = 0x180000000C -; X64-NEXT: movq %rax, (%rdi) ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movabsq $105660490448944, %rcx # imm = 0x601900000030 +; X64-NEXT: movq %rcx, 8(%rax) +; X64-NEXT: movabsq $103079215116, %rcx # imm = 0x180000000C +; X64-NEXT: movq %rcx, (%rax) ; X64-NEXT: retq entry: %0 = insertvalue %0 zeroinitializer, i32 12, 0 @@ -35,18 +35,18 @@ define fastcc %1 @ReturnBigStruct2() nounwind readnone { ; X86-LABEL: ReturnBigStruct2: ; X86: # BB#0: # %entry -; X86-NEXT: movl $48, 4(%ecx) -; X86-NEXT: movb $1, 2(%ecx) -; X86-NEXT: movw $256, (%ecx) # imm = 0x100 ; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl $48, 4(%eax) +; X86-NEXT: movb $1, 2(%eax) +; X86-NEXT: movw $256, (%eax) # imm = 0x100 ; X86-NEXT: retl ; ; X64-LABEL: ReturnBigStruct2: ; X64: # BB#0: # %entry -; X64-NEXT: movl $48, 4(%rdi) -; X64-NEXT: movb $1, 2(%rdi) -; X64-NEXT: movw $256, (%rdi) # imm = 0x100 ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movl $48, 4(%rax) +; X64-NEXT: movb $1, 2(%rax) +; X64-NEXT: movw $256, (%rax) # imm = 0x100 ; X64-NEXT: retq entry: %0 = insertvalue %1 zeroinitializer, i1 false, 0 Index: test/CodeGen/X86/bitcast-i256.ll =================================================================== --- test/CodeGen/X86/bitcast-i256.ll +++ test/CodeGen/X86/bitcast-i256.ll @@ -5,16 +5,16 @@ define i256 @foo(<8 x i32> %a) { ; FAST-LABEL: foo: ; FAST: # BB#0: -; FAST-NEXT: vmovups %ymm0, (%rdi) ; FAST-NEXT: movq %rdi, %rax +; FAST-NEXT: vmovups %ymm0, (%rax) ; FAST-NEXT: vzeroupper ; FAST-NEXT: retq ; ; SLOW-LABEL: foo: ; SLOW: # BB#0: -; SLOW-NEXT: vextractf128 $1, %ymm0, 16(%rdi) -; SLOW-NEXT: vmovups %xmm0, (%rdi) ; SLOW-NEXT: movq %rdi, %rax +; SLOW-NEXT: vextractf128 $1, %ymm0, 16(%rax) +; SLOW-NEXT: vmovups %xmm0, (%rax) ; SLOW-NEXT: vzeroupper ; SLOW-NEXT: retq %r = bitcast <8 x i32> %a to i256 Index: test/CodeGen/X86/bitcast-int-to-vector-bool.ll =================================================================== --- test/CodeGen/X86/bitcast-int-to-vector-bool.ll +++ test/CodeGen/X86/bitcast-int-to-vector-bool.ll @@ -200,8 +200,8 @@ define <32 x i1> @bitcast_i32_32i1(i32 %a0) { ; SSE2-SSSE3-LABEL: bitcast_i32_32i1: ; SSE2-SSSE3: # BB#0: -; SSE2-SSSE3-NEXT: movl %esi, (%rdi) ; SSE2-SSSE3-NEXT: movq %rdi, %rax +; SSE2-SSSE3-NEXT: movl %esi, (%rax) ; SSE2-SSSE3-NEXT: retq ; ; AVX1-LABEL: bitcast_i32_32i1: @@ -257,14 +257,14 @@ define <64 x i1> @bitcast_i64_64i1(i64 %a0) { ; SSE2-SSSE3-LABEL: bitcast_i64_64i1: ; SSE2-SSSE3: # BB#0: -; SSE2-SSSE3-NEXT: movq %rsi, (%rdi) ; SSE2-SSSE3-NEXT: movq %rdi, %rax +; SSE2-SSSE3-NEXT: movq %rsi, (%rax) ; SSE2-SSSE3-NEXT: retq ; ; AVX12-LABEL: bitcast_i64_64i1: ; AVX12: # BB#0: -; AVX12-NEXT: movq %rsi, (%rdi) ; AVX12-NEXT: movq %rdi, %rax +; AVX12-NEXT: movq %rsi, (%rax) ; AVX12-NEXT: retq ; ; AVX512-LABEL: bitcast_i64_64i1: Index: test/CodeGen/X86/bitreverse.ll =================================================================== --- test/CodeGen/X86/bitreverse.ll +++ test/CodeGen/X86/bitreverse.ll @@ -341,20 +341,21 @@ ; ; X64-LABEL: test_bitreverse_i8: ; X64: # BB#0: -; X64-NEXT: rolb $4, %dil -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $51, %al -; X64-NEXT: shlb $2, %al -; X64-NEXT: andb $-52, %dil -; X64-NEXT: shrb $2, %dil -; X64-NEXT: orb %al, %dil -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $85, %al -; X64-NEXT: addb %al, %al -; X64-NEXT: andb $-86, %dil -; X64-NEXT: shrb %dil -; X64-NEXT: orb %al, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolb $4, %al +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andb $51, %cl +; X64-NEXT: shlb $2, %cl +; X64-NEXT: andb $-52, %al +; X64-NEXT: shrb $2, %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andb $85, %cl +; X64-NEXT: addb %cl, %cl +; X64-NEXT: andb $-86, %al +; X64-NEXT: shrb %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %b @@ -384,21 +385,22 @@ ; ; X64-LABEL: test_bitreverse_i4: ; X64: # BB#0: -; X64-NEXT: rolb $4, %dil -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $51, %al -; X64-NEXT: shlb $2, %al -; X64-NEXT: andb $-52, %dil -; X64-NEXT: shrb $2, %dil -; X64-NEXT: orb %al, %dil -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $80, %al -; X64-NEXT: addb %al, %al -; X64-NEXT: andb $-96, %dil -; X64-NEXT: shrb %dil -; X64-NEXT: orb %al, %dil -; X64-NEXT: shrb $4, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolb $4, %al +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andb $51, %cl +; X64-NEXT: shlb $2, %cl +; X64-NEXT: andb $-52, %al +; X64-NEXT: shrb $2, %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andb $80, %cl +; X64-NEXT: addb %cl, %cl +; X64-NEXT: andb $-96, %al +; X64-NEXT: shrb %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: shrb $4, %al +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq %b = call i4 @llvm.bitreverse.i4(i4 %a) ret i4 %b @@ -474,6 +476,7 @@ ; X64-LABEL: identity_i8: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 %a) %c = call i8 @llvm.bitreverse.i8(i8 %b) Index: test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll =================================================================== --- test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll +++ test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll @@ -10,9 +10,9 @@ define i64 @test__andn_u64(i64 %a0, i64 %a1) { ; X64-LABEL: test__andn_u64: ; X64: # BB#0: -; X64-NEXT: xorq $-1, %rdi -; X64-NEXT: andq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorq $-1, %rax +; X64-NEXT: andq %rsi, %rax ; X64-NEXT: retq %xor = xor i64 %a0, -1 %res = and i64 %xor, %a1 @@ -84,9 +84,9 @@ define i64 @test_andn_u64(i64 %a0, i64 %a1) { ; X64-LABEL: test_andn_u64: ; X64: # BB#0: -; X64-NEXT: xorq $-1, %rdi -; X64-NEXT: andq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorq $-1, %rax +; X64-NEXT: andq %rsi, %rax ; X64-NEXT: retq %xor = xor i64 %a0, -1 %res = and i64 %xor, %a1 Index: test/CodeGen/X86/bmi-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/bmi-intrinsics-fast-isel.ll +++ test/CodeGen/X86/bmi-intrinsics-fast-isel.ll @@ -47,9 +47,9 @@ ; ; X64-LABEL: test__andn_u32: ; X64: # BB#0: -; X64-NEXT: xorl $-1, %edi -; X64-NEXT: andl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl $-1, %eax +; X64-NEXT: andl %esi, %eax ; X64-NEXT: retq %xor = xor i32 %a0, -1 %res = and i32 %xor, %a1 @@ -199,9 +199,9 @@ ; ; X64-LABEL: test_andn_u32: ; X64: # BB#0: -; X64-NEXT: xorl $-1, %edi -; X64-NEXT: andl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl $-1, %eax +; X64-NEXT: andl %esi, %eax ; X64-NEXT: retq %xor = xor i32 %a0, -1 %res = and i32 %xor, %a1 Index: test/CodeGen/X86/bmi.ll =================================================================== --- test/CodeGen/X86/bmi.ll +++ test/CodeGen/X86/bmi.ll @@ -420,9 +420,9 @@ define i32 @non_bextr32(i32 %x) { ; CHECK-LABEL: non_bextr32: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: shrl $2, %edi -; CHECK-NEXT: andl $111, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $2, %eax +; CHECK-NEXT: andl $111, %eax ; CHECK-NEXT: retq entry: %shr = lshr i32 %x, 2 @@ -446,8 +446,9 @@ define i32 @bzhi32b(i32 %x, i8 zeroext %index) { ; BMI1-LABEL: bzhi32b: ; BMI1: # BB#0: # %entry -; BMI1-NEXT: movl $1, %eax ; BMI1-NEXT: movl %esi, %ecx +; BMI1-NEXT: movl $1, %eax +; BMI1-NEXT: # kill: %CL %CL %ECX ; BMI1-NEXT: shll %cl, %eax ; BMI1-NEXT: decl %eax ; BMI1-NEXT: andl %edi, %eax @@ -468,8 +469,9 @@ define i32 @bzhi32b_load(i32* %w, i8 zeroext %index) { ; BMI1-LABEL: bzhi32b_load: ; BMI1: # BB#0: # %entry -; BMI1-NEXT: movl $1, %eax ; BMI1-NEXT: movl %esi, %ecx +; BMI1-NEXT: movl $1, %eax +; BMI1-NEXT: # kill: %CL %CL %ECX ; BMI1-NEXT: shll %cl, %eax ; BMI1-NEXT: decl %eax ; BMI1-NEXT: andl (%rdi), %eax @@ -491,8 +493,9 @@ define i32 @bzhi32c(i32 %x, i8 zeroext %index) { ; BMI1-LABEL: bzhi32c: ; BMI1: # BB#0: # %entry -; BMI1-NEXT: movl $1, %eax ; BMI1-NEXT: movl %esi, %ecx +; BMI1-NEXT: movl $1, %eax +; BMI1-NEXT: # kill: %CL %CL %ECX ; BMI1-NEXT: shll %cl, %eax ; BMI1-NEXT: decl %eax ; BMI1-NEXT: andl %edi, %eax @@ -535,12 +538,12 @@ define i32 @bzhi32e(i32 %a, i32 %b) { ; BMI1-LABEL: bzhi32e: ; BMI1: # BB#0: # %entry +; BMI1-NEXT: movl %edi, %eax ; BMI1-NEXT: movl $32, %ecx ; BMI1-NEXT: subl %esi, %ecx -; BMI1-NEXT: shll %cl, %edi +; BMI1-NEXT: shll %cl, %eax ; BMI1-NEXT: # kill: %CL %CL %ECX -; BMI1-NEXT: shrl %cl, %edi -; BMI1-NEXT: movl %edi, %eax +; BMI1-NEXT: shrl %cl, %eax ; BMI1-NEXT: retq ; ; BMI2-LABEL: bzhi32e: @@ -557,8 +560,9 @@ define i64 @bzhi64b(i64 %x, i8 zeroext %index) { ; BMI1-LABEL: bzhi64b: ; BMI1: # BB#0: # %entry -; BMI1-NEXT: movl $1, %eax ; BMI1-NEXT: movl %esi, %ecx +; BMI1-NEXT: movl $1, %eax +; BMI1-NEXT: # kill: %CL %CL %ECX ; BMI1-NEXT: shlq %cl, %rax ; BMI1-NEXT: decq %rax ; BMI1-NEXT: andq %rdi, %rax @@ -626,12 +630,12 @@ define i64 @bzhi64e(i64 %a, i64 %b) { ; BMI1-LABEL: bzhi64e: ; BMI1: # BB#0: # %entry +; BMI1-NEXT: movq %rdi, %rax ; BMI1-NEXT: movl $64, %ecx ; BMI1-NEXT: subl %esi, %ecx -; BMI1-NEXT: shlq %cl, %rdi +; BMI1-NEXT: shlq %cl, %rax ; BMI1-NEXT: # kill: %CL %CL %ECX -; BMI1-NEXT: shrq %cl, %rdi -; BMI1-NEXT: movq %rdi, %rax +; BMI1-NEXT: shrq %cl, %rax ; BMI1-NEXT: retq ; ; BMI2-LABEL: bzhi64e: @@ -648,12 +652,12 @@ define i64 @bzhi64f(i64 %a, i32 %b) { ; BMI1-LABEL: bzhi64f: ; BMI1: # BB#0: # %entry +; BMI1-NEXT: movq %rdi, %rax ; BMI1-NEXT: movl $64, %ecx ; BMI1-NEXT: subl %esi, %ecx -; BMI1-NEXT: shlq %cl, %rdi +; BMI1-NEXT: shlq %cl, %rax ; BMI1-NEXT: # kill: %CL %CL %ECX -; BMI1-NEXT: shrq %cl, %rdi -; BMI1-NEXT: movq %rdi, %rax +; BMI1-NEXT: shrq %cl, %rax ; BMI1-NEXT: retq ; ; BMI2-LABEL: bzhi64f: @@ -707,8 +711,8 @@ define i64 @bzhi64_small_constant_mask(i64 %x) { ; CHECK-LABEL: bzhi64_small_constant_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF ; CHECK-NEXT: retq entry: %and = and i64 %x, 2147483647 Index: test/CodeGen/X86/bool-simplify.ll =================================================================== --- test/CodeGen/X86/bool-simplify.ll +++ test/CodeGen/X86/bool-simplify.ll @@ -4,9 +4,9 @@ define i32 @foo(<2 x i64> %c, i32 %a, i32 %b) { ; CHECK-LABEL: foo: ; CHECK: # BB#0: -; CHECK-NEXT: ptest %xmm0, %xmm0 -; CHECK-NEXT: cmovnel %esi, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ptest %xmm0, %xmm0 +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: retq %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c) %t2 = icmp ne i32 %t1, 0 Index: test/CodeGen/X86/bswap-rotate.ll =================================================================== --- test/CodeGen/X86/bswap-rotate.ll +++ test/CodeGen/X86/bswap-rotate.ll @@ -14,8 +14,9 @@ ; ; X64-LABEL: combine_bswap_rotate: ; X64: # BB#0: -; X64-NEXT: rolw $9, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolw $9, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %1 = call i16 @llvm.bswap.i16(i16 %a0) %2 = shl i16 %1, 1 Index: test/CodeGen/X86/bswap-wide-int.ll =================================================================== --- test/CodeGen/X86/bswap-wide-int.ll +++ test/CodeGen/X86/bswap-wide-int.ll @@ -25,14 +25,14 @@ ; ; X64-LABEL: bswap_i64: ; X64: # BB#0: -; X64-NEXT: bswapq %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: bswapq %rax ; X64-NEXT: retq ; ; X64-MOVBE-LABEL: bswap_i64: ; X64-MOVBE: # BB#0: -; X64-MOVBE-NEXT: bswapq %rdi ; X64-MOVBE-NEXT: movq %rdi, %rax +; X64-MOVBE-NEXT: bswapq %rax ; X64-MOVBE-NEXT: retq %1 = call i64 @llvm.bswap.i64(i64 %a0) ret i64 %1 @@ -79,17 +79,17 @@ ; ; X64-LABEL: bswap_i128: ; X64: # BB#0: -; X64-NEXT: bswapq %rsi -; X64-NEXT: bswapq %rdi ; X64-NEXT: movq %rsi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: bswapq %rdi ; X64-NEXT: movq %rdi, %rdx ; X64-NEXT: retq ; ; X64-MOVBE-LABEL: bswap_i128: ; X64-MOVBE: # BB#0: -; X64-MOVBE-NEXT: bswapq %rsi -; X64-MOVBE-NEXT: bswapq %rdi ; X64-MOVBE-NEXT: movq %rsi, %rax +; X64-MOVBE-NEXT: bswapq %rax +; X64-MOVBE-NEXT: bswapq %rdi ; X64-MOVBE-NEXT: movq %rdi, %rdx ; X64-MOVBE-NEXT: retq %1 = call i128 @llvm.bswap.i128(i128 %a0) @@ -149,24 +149,24 @@ ; ; X64-LABEL: bswap_i256: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: bswapq %r8 ; X64-NEXT: bswapq %rcx ; X64-NEXT: bswapq %rdx ; X64-NEXT: bswapq %rsi -; X64-NEXT: movq %rsi, 24(%rdi) -; X64-NEXT: movq %rdx, 16(%rdi) -; X64-NEXT: movq %rcx, 8(%rdi) -; X64-NEXT: movq %r8, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rsi, 24(%rax) +; X64-NEXT: movq %rdx, 16(%rax) +; X64-NEXT: movq %rcx, 8(%rax) +; X64-NEXT: movq %r8, (%rax) ; X64-NEXT: retq ; ; X64-MOVBE-LABEL: bswap_i256: ; X64-MOVBE: # BB#0: -; X64-MOVBE-NEXT: movbeq %rsi, 24(%rdi) -; X64-MOVBE-NEXT: movbeq %rdx, 16(%rdi) -; X64-MOVBE-NEXT: movbeq %rcx, 8(%rdi) -; X64-MOVBE-NEXT: movbeq %r8, (%rdi) ; X64-MOVBE-NEXT: movq %rdi, %rax +; X64-MOVBE-NEXT: movbeq %rsi, 24(%rax) +; X64-MOVBE-NEXT: movbeq %rdx, 16(%rax) +; X64-MOVBE-NEXT: movbeq %rcx, 8(%rax) +; X64-MOVBE-NEXT: movbeq %r8, (%rax) ; X64-MOVBE-NEXT: retq %1 = call i256 @llvm.bswap.i256(i256 %a0) ret i256 %1 Index: test/CodeGen/X86/bswap_tree.ll =================================================================== --- test/CodeGen/X86/bswap_tree.ll +++ test/CodeGen/X86/bswap_tree.ll @@ -20,9 +20,9 @@ ; ; CHECK64-LABEL: test1: ; CHECK64: # BB#0: -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: roll $16, %edi ; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: roll $16, %eax ; CHECK64-NEXT: retq %byte0 = and i32 %x, 255 ; 0x000000ff %byte1 = and i32 %x, 65280 ; 0x0000ff00 @@ -53,9 +53,9 @@ ; ; CHECK64-LABEL: test2: ; CHECK64: # BB#0: -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: roll $16, %edi ; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: roll $16, %eax ; CHECK64-NEXT: retq %byte1 = shl i32 %x, 8 %byte0 = lshr i32 %x, 8 Index: test/CodeGen/X86/bswap_tree2.ll =================================================================== --- test/CodeGen/X86/bswap_tree2.ll +++ test/CodeGen/X86/bswap_tree2.ll @@ -25,16 +25,16 @@ ; CHECK64-LABEL: test1: ; CHECK64: # BB#0: ; CHECK64-NEXT: movl %edi, %eax -; CHECK64-NEXT: andl $16711680, %eax # imm = 0xFF0000 -; CHECK64-NEXT: movl %edi, %ecx -; CHECK64-NEXT: orl $-16777216, %ecx # imm = 0xFF000000 -; CHECK64-NEXT: shll $8, %eax -; CHECK64-NEXT: shrl $8, %ecx -; CHECK64-NEXT: orl %eax, %ecx -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: shrl $16, %edi -; CHECK64-NEXT: orl %ecx, %edi -; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: movl %eax, %ecx +; CHECK64-NEXT: andl $16711680, %ecx # imm = 0xFF0000 +; CHECK64-NEXT: movl %eax, %edx +; CHECK64-NEXT: orl $-16777216, %edx # imm = 0xFF000000 +; CHECK64-NEXT: shll $8, %ecx +; CHECK64-NEXT: shrl $8, %edx +; CHECK64-NEXT: orl %ecx, %edx +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: shrl $16, %eax +; CHECK64-NEXT: orl %edx, %eax ; CHECK64-NEXT: retq %byte0 = and i32 %x, 255 ; 0x000000ff %byte1 = and i32 %x, 65280 ; 0x0000ff00 Index: test/CodeGen/X86/bt.ll =================================================================== --- test/CodeGen/X86/bt.ll +++ test/CodeGen/X86/bt.ll @@ -1112,16 +1112,16 @@ ; ; X64-LABEL: demanded_i32: ; X64: # BB#0: -; X64-NEXT: movl %edx, %eax -; X64-NEXT: shrl $5, %eax -; X64-NEXT: movl (%rdi,%rax,4), %r8d -; X64-NEXT: movl $1, %edi ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shll %cl, %edi -; X64-NEXT: btl %edx, %r8d +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: shrl $5, %eax +; X64-NEXT: movl (%rdi,%rax,4), %edi +; X64-NEXT: movl $1, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: btl %ecx, %edi ; X64-NEXT: jae .LBB30_2 ; X64-NEXT: # BB#1: -; X64-NEXT: orl %edi, (%rsi,%rax,4) +; X64-NEXT: orl %edx, (%rsi,%rax,4) ; X64-NEXT: .LBB30_2: ; X64-NEXT: retq %4 = lshr i32 %2, 5 Index: test/CodeGen/X86/bypass-slow-division-64.ll =================================================================== --- test/CodeGen/X86/bypass-slow-division-64.ll +++ test/CodeGen/X86/bypass-slow-division-64.ll @@ -8,17 +8,17 @@ ; CHECK-LABEL: Test_get_quotient: ; CHECK: # BB#0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: orq %rsi, %rcx +; CHECK-NEXT: shrq $32, %rcx ; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: # BB#2: -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: cqto ; CHECK-NEXT: idivq %rsi ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %EAX %EAX %RAX ; CHECK-NEXT: divl %esi ; CHECK-NEXT: # kill: %EAX %EAX %RAX ; CHECK-NEXT: retq @@ -30,21 +30,20 @@ ; CHECK-LABEL: Test_get_remainder: ; CHECK: # BB#0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: orq %rsi, %rcx +; CHECK-NEXT: shrq $32, %rcx ; CHECK-NEXT: je .LBB1_1 ; CHECK-NEXT: # BB#2: -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: cqto ; CHECK-NEXT: idivq %rsi ; CHECK-NEXT: movq %rdx, %rax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB1_1: ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %EAX %EAX %RAX ; CHECK-NEXT: divl %esi -; CHECK-NEXT: # kill: %EDX %EDX %RDX -; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: retq %result = srem i64 %a, %b ret i64 %result @@ -54,18 +53,18 @@ ; CHECK-LABEL: Test_get_quotient_and_remainder: ; CHECK: # BB#0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: orq %rsi, %rcx +; CHECK-NEXT: shrq $32, %rcx ; CHECK-NEXT: je .LBB2_1 ; CHECK-NEXT: # BB#2: -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: cqto ; CHECK-NEXT: idivq %rsi ; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB2_1: ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %EAX %EAX %RAX ; CHECK-NEXT: divl %esi ; CHECK-NEXT: # kill: %EDX %EDX %RDX ; CHECK-NEXT: # kill: %EAX %EAX %RAX Index: test/CodeGen/X86/cmov-into-branch.ll =================================================================== --- test/CodeGen/X86/cmov-into-branch.ll +++ test/CodeGen/X86/cmov-into-branch.ll @@ -5,9 +5,9 @@ define i32 @test1(double %a, double* nocapture %b, i32 %x, i32 %y) { ; CHECK-LABEL: test1: ; CHECK: # BB#0: -; CHECK-NEXT: ucomisd (%rdi), %xmm0 -; CHECK-NEXT: cmovbel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: ucomisd (%rdi), %xmm0 +; CHECK-NEXT: cmovbel %edx, %eax ; CHECK-NEXT: retq %load = load double, double* %b, align 8 %cmp = fcmp olt double %load, %a @@ -19,9 +19,9 @@ define i32 @test2(double %a, double %b, i32 %x, i32 %y) { ; CHECK-LABEL: test2: ; CHECK: # BB#0: -; CHECK-NEXT: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: cmovbel %esi, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ucomisd %xmm1, %xmm0 +; CHECK-NEXT: cmovbel %esi, %eax ; CHECK-NEXT: retq %cmp = fcmp ogt double %a, %b %cond = select i1 %cmp, i32 %x, i32 %y @@ -48,10 +48,10 @@ define i32 @test5(i32 %a, i32* nocapture %b, i32 %x, i32 %y) { ; CHECK-LABEL: test5: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl %edi, (%rsi) -; CHECK-NEXT: cmoval %edi, %ecx -; CHECK-NEXT: cmovael %edx, %ecx ; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: cmpl %edi, (%rsi) +; CHECK-NEXT: cmoval %edi, %eax +; CHECK-NEXT: cmovael %edx, %eax ; CHECK-NEXT: retq %load = load i32, i32* %b, align 4 %cmp = icmp ult i32 %load, %a @@ -83,9 +83,9 @@ define i32 @weighted_select1(i32 %a, i32 %b) { ; CHECK-LABEL: weighted_select1: ; CHECK: # BB#0: -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: cmovnel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: cmovnel %edi, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !0 @@ -96,12 +96,12 @@ define i32 @weighted_select2(i32 %a, i32 %b) { ; CHECK-LABEL: weighted_select2: ; CHECK: # BB#0: -; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: jne .LBB6_2 ; CHECK-NEXT: # BB#1: # %select.false -; CHECK-NEXT: movl %esi, %edi +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: .LBB6_2: # %select.end -; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !1 @@ -115,14 +115,13 @@ define i32 @weighted_select3(i32 %a, i32 %b) { ; CHECK-LABEL: weighted_select3: ; CHECK: # BB#0: -; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: je .LBB7_1 ; CHECK-NEXT: # BB#2: # %select.end -; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB7_1: # %select.false -; CHECK-NEXT: movl %esi, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !2 @@ -133,9 +132,9 @@ define i32 @unweighted_select(i32 %a, i32 %b) { ; CHECK-LABEL: unweighted_select: ; CHECK: # BB#0: -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: cmovnel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: cmovnel %edi, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !3 Index: test/CodeGen/X86/cmov.ll =================================================================== --- test/CodeGen/X86/cmov.ll +++ test/CodeGen/X86/cmov.ll @@ -194,12 +194,13 @@ define i8 @test7(i1 inreg %c, i8 inreg %a, i8 inreg %b) nounwind { ; CHECK-LABEL: test7: ; CHECK: # BB#0: +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: testb $1, %dil ; CHECK-NEXT: jne .LBB6_2 ; CHECK-NEXT: # BB#1: -; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: .LBB6_2: -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %d = select i1 %c, i8 %a, i8 %b ret i8 %d Index: test/CodeGen/X86/cmovcmov.ll =================================================================== --- test/CodeGen/X86/cmovcmov.ll +++ test/CodeGen/X86/cmovcmov.ll @@ -9,10 +9,10 @@ ; CHECK-LABEL: test_select_fcmp_oeq_i32: -; CMOV-NEXT: ucomiss %xmm1, %xmm0 -; CMOV-NEXT: cmovnel %esi, %edi -; CMOV-NEXT: cmovpl %esi, %edi ; CMOV-NEXT: movl %edi, %eax +; CMOV-NEXT: ucomiss %xmm1, %xmm0 +; CMOV-NEXT: cmovnel %esi, %eax +; CMOV-NEXT: cmovpl %esi, %eax ; CMOV-NEXT: retq ; NOCMOV-NEXT: flds 8(%esp) @@ -36,10 +36,10 @@ ; CHECK-LABEL: test_select_fcmp_oeq_i64: -; CMOV-NEXT: ucomiss %xmm1, %xmm0 -; CMOV-NEXT: cmovneq %rsi, %rdi -; CMOV-NEXT: cmovpq %rsi, %rdi ; CMOV-NEXT: movq %rdi, %rax +; CMOV-NEXT: ucomiss %xmm1, %xmm0 +; CMOV-NEXT: cmovneq %rsi, %rax +; CMOV-NEXT: cmovpq %rsi, %rax ; CMOV-NEXT: retq ; NOCMOV-NEXT: flds 8(%esp) @@ -64,10 +64,10 @@ ; CHECK-LABEL: test_select_fcmp_une_i64: -; CMOV-NEXT: ucomiss %xmm1, %xmm0 -; CMOV-NEXT: cmovneq %rdi, %rsi -; CMOV-NEXT: cmovpq %rdi, %rsi ; CMOV-NEXT: movq %rsi, %rax +; CMOV-NEXT: ucomiss %xmm1, %xmm0 +; CMOV-NEXT: cmovneq %rdi, %rax +; CMOV-NEXT: cmovpq %rdi, %rax ; CMOV-NEXT: retq ; NOCMOV-NEXT: flds 8(%esp) Index: test/CodeGen/X86/cmp.ll =================================================================== --- test/CodeGen/X86/cmp.ll +++ test/CodeGen/X86/cmp.ll @@ -268,9 +268,9 @@ define i32 @test13(i32 %mask, i32 %base, i32 %intra) { ; CHECK-LABEL: test13: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: testb $8, %dil # encoding: [0x40,0xf6,0xc7,0x08] -; CHECK-NEXT: cmovnel %edx, %esi # encoding: [0x0f,0x45,0xf2] ; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] +; CHECK-NEXT: testb $8, %dil # encoding: [0x40,0xf6,0xc7,0x08] +; CHECK-NEXT: cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2] ; CHECK-NEXT: retq # encoding: [0xc3] entry: %and = and i32 %mask, 8 @@ -283,9 +283,9 @@ define i32 @test14(i32 %mask, i32 %base, i32 %intra) { ; CHECK-LABEL: test14: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: shrl $7, %edi # encoding: [0xc1,0xef,0x07] -; CHECK-NEXT: cmovnsl %edx, %esi # encoding: [0x0f,0x49,0xf2] ; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] +; CHECK-NEXT: shrl $7, %edi # encoding: [0xc1,0xef,0x07] +; CHECK-NEXT: cmovnsl %edx, %eax # encoding: [0x0f,0x49,0xc2] ; CHECK-NEXT: retq # encoding: [0xc3] entry: %s = lshr i32 %mask, 7 Index: test/CodeGen/X86/combine-add.ll =================================================================== --- test/CodeGen/X86/combine-add.ll +++ test/CodeGen/X86/combine-add.ll @@ -103,8 +103,8 @@ define <4 x i32> @combine_vec_add_sub_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_add0: ; SSE: # BB#0: -; SSE-NEXT: psubd %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_add0: @@ -121,8 +121,8 @@ define <4 x i32> @combine_vec_add_sub_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_add1: ; SSE: # BB#0: -; SSE-NEXT: psubd %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_add1: @@ -139,8 +139,8 @@ define <4 x i32> @combine_vec_add_sub_add2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_add2: ; SSE: # BB#0: -; SSE-NEXT: paddd %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: paddd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_add2: @@ -157,8 +157,8 @@ define <4 x i32> @combine_vec_add_sub_add3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_add3: ; SSE: # BB#0: -; SSE-NEXT: psubd %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_add3: @@ -203,9 +203,9 @@ ; ; AVX-LABEL: combine_vec_add_uniquebits: ; AVX: # BB#0: -; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 +; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [61680,61680,61680,61680] ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 +; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [3855,3855,3855,3855] ; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq Index: test/CodeGen/X86/conditional-indecrement.ll =================================================================== --- test/CodeGen/X86/conditional-indecrement.ll +++ test/CodeGen/X86/conditional-indecrement.ll @@ -4,9 +4,9 @@ define i32 @test1(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test1: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $-1, %eax ; CHECK-NEXT: retq %not.cmp = icmp ne i32 %a, 0 %inc = zext i1 %not.cmp to i32 @@ -17,9 +17,9 @@ define i32 @test1_commute(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test1_commute: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $-1, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -30,9 +30,9 @@ define i32 @test2(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test2: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: adcl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: adcl $0, %eax ; CHECK-NEXT: retq %cmp = icmp eq i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -43,9 +43,9 @@ define i32 @test3(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test3: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: adcl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: adcl $0, %eax ; CHECK-NEXT: retq %cmp = icmp eq i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -56,9 +56,9 @@ define i32 @test4(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test4: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $-1, %eax ; CHECK-NEXT: retq %not.cmp = icmp ne i32 %a, 0 %inc = zext i1 %not.cmp to i32 @@ -69,9 +69,9 @@ define i32 @test5(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test5: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: adcl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: adcl $-1, %eax ; CHECK-NEXT: retq %not.cmp = icmp ne i32 %a, 0 %inc = zext i1 %not.cmp to i32 @@ -82,9 +82,9 @@ define i32 @test6(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test6: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $0, %eax ; CHECK-NEXT: retq %cmp = icmp eq i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -95,9 +95,9 @@ define i32 @test7(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test7: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $0, %eax ; CHECK-NEXT: retq %cmp = icmp eq i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -108,9 +108,9 @@ define i32 @test8(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test8: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: adcl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: adcl $-1, %eax ; CHECK-NEXT: retq %not.cmp = icmp ne i32 %a, 0 %inc = zext i1 %not.cmp to i32 Index: test/CodeGen/X86/divide-by-constant.ll =================================================================== --- test/CodeGen/X86/divide-by-constant.ll +++ test/CodeGen/X86/divide-by-constant.ll @@ -96,8 +96,8 @@ ; X32: # BB#0: ; X32-NEXT: movl $365384439, %eax # imm = 0x15C752F7 ; X32-NEXT: mull {{[0-9]+}}(%esp) -; X32-NEXT: shrl $27, %edx ; X32-NEXT: movl %edx, %eax +; X32-NEXT: shrl $27, %eax ; X32-NEXT: retl ; ; X64-LABEL: test5: @@ -222,9 +222,9 @@ ; ; X64-LABEL: testsize1: ; X64: # BB#0: # %entry +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pushq $32 ; X64-NEXT: popq %rcx -; X64-NEXT: movl %edi, %eax ; X64-NEXT: cltd ; X64-NEXT: idivl %ecx ; X64-NEXT: retq @@ -245,9 +245,9 @@ ; ; X64-LABEL: testsize2: ; X64: # BB#0: # %entry +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pushq $33 ; X64-NEXT: popq %rcx -; X64-NEXT: movl %edi, %eax ; X64-NEXT: cltd ; X64-NEXT: idivl %ecx ; X64-NEXT: retq @@ -265,8 +265,8 @@ ; ; X64-LABEL: testsize3: ; X64: # BB#0: # %entry -; X64-NEXT: shrl $5, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrl $5, %eax ; X64-NEXT: retq entry: %div = udiv i32 %x, 32 @@ -285,10 +285,10 @@ ; ; X64-LABEL: testsize4: ; X64: # BB#0: # %entry +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pushq $33 ; X64-NEXT: popq %rcx ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: movl %edi, %eax ; X64-NEXT: divl %ecx ; X64-NEXT: retq entry: @@ -316,19 +316,18 @@ ; ; X64-LABEL: PR23590: ; X64: # BB#0: # %entry -; X64-NEXT: movq %rdi, %rcx -; X64-NEXT: movabsq $6120523590596543007, %rdx # imm = 0x54F077C718E7C21F -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rdx +; X64-NEXT: movabsq $6120523590596543007, %rcx # imm = 0x54F077C718E7C21F +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx ; X64-NEXT: shrq $12, %rdx ; X64-NEXT: imulq $12345, %rdx, %rax # imm = 0x3039 -; X64-NEXT: subq %rax, %rcx -; X64-NEXT: movabsq $2635249153387078803, %rdx # imm = 0x2492492492492493 -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rdx -; X64-NEXT: subq %rdx, %rcx -; X64-NEXT: shrq %rcx -; X64-NEXT: leaq (%rcx,%rdx), %rax +; X64-NEXT: subq %rax, %rdi +; X64-NEXT: movabsq $2635249153387078803, %rcx # imm = 0x2492492492492493 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: subq %rdx, %rdi +; X64-NEXT: shrq %rdi +; X64-NEXT: leaq (%rdi,%rdx), %rax ; X64-NEXT: shrq $2, %rax ; X64-NEXT: retq entry: Index: test/CodeGen/X86/divrem.ll =================================================================== --- test/CodeGen/X86/divrem.ll +++ test/CodeGen/X86/divrem.ll @@ -101,6 +101,7 @@ ; X64: # BB#0: ; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: cwtd ; X64-NEXT: idivw %si ; X64-NEXT: movw %ax, (%r8) @@ -131,6 +132,7 @@ ; X64-LABEL: si8: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %esi # NOREX @@ -182,8 +184,8 @@ ; X64-LABEL: ui64: ; X64: # BB#0: ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divq %rsi ; X64-NEXT: movq %rax, (%r8) ; X64-NEXT: movq %rdx, (%rcx) @@ -212,8 +214,8 @@ ; X64-LABEL: ui32: ; X64: # BB#0: ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divl %esi ; X64-NEXT: movl %eax, (%r8) ; X64-NEXT: movl %edx, (%rcx) @@ -242,8 +244,9 @@ ; X64-LABEL: ui16: ; X64: # BB#0: ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: divw %si ; X64-NEXT: movw %ax, (%r8) ; X64-NEXT: movw %dx, (%rcx) Index: test/CodeGen/X86/divrem8_ext.ll =================================================================== --- test/CodeGen/X86/divrem8_ext.ll +++ test/CodeGen/X86/divrem8_ext.ll @@ -112,6 +112,7 @@ ; X64-LABEL: test_sdivrem_sext_ah: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %ecx # NOREX @@ -137,6 +138,7 @@ ; X64-LABEL: test_srem_sext_ah: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %eax # NOREX @@ -161,6 +163,7 @@ ; X64-LABEL: test_srem_noext_ah: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %eax # NOREX @@ -186,6 +189,7 @@ ; X64-LABEL: test_srem_sext64_ah: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %eax # NOREX Index: test/CodeGen/X86/fast-isel-fold-mem.ll =================================================================== --- test/CodeGen/X86/fast-isel-fold-mem.ll +++ test/CodeGen/X86/fast-isel-fold-mem.ll @@ -3,8 +3,8 @@ define i64 @fold_load(i64* %a, i64 %b) { ; CHECK-LABEL: fold_load -; CHECK: addq (%rdi), %rsi -; CHECK-NEXT: movq %rsi, %rax +; CHECK: movq %rsi, %rax +; CHECK-NEXT: addq (%rdi), %rax %1 = load i64, i64* %a, align 8 %2 = add i64 %1, %b ret i64 %2 Index: test/CodeGen/X86/fast-isel-select-cmov.ll =================================================================== --- test/CodeGen/X86/fast-isel-select-cmov.ll +++ test/CodeGen/X86/fast-isel-select-cmov.ll @@ -31,9 +31,9 @@ define i32 @select_cmov_i32(i1 zeroext %cond, i32 %a, i32 %b) { ; CHECK-LABEL: select_cmov_i32: ; CHECK: ## BB#0: -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: cmovel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: cmovel %edx, %eax ; CHECK-NEXT: retq %1 = select i1 %cond, i32 %a, i32 %b ret i32 %1 @@ -42,9 +42,9 @@ define i32 @select_cmp_cmov_i32(i32 %a, i32 %b) { ; CHECK-LABEL: select_cmp_cmov_i32: ; CHECK: ## BB#0: -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: cmovbl %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: cmovbl %edi, %eax ; CHECK-NEXT: retq %1 = icmp ult i32 %a, %b %2 = select i1 %1, i32 %a, i32 %b @@ -54,9 +54,9 @@ define i64 @select_cmov_i64(i1 zeroext %cond, i64 %a, i64 %b) { ; CHECK-LABEL: select_cmov_i64: ; CHECK: ## BB#0: -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: cmoveq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: cmoveq %rdx, %rax ; CHECK-NEXT: retq %1 = select i1 %cond, i64 %a, i64 %b ret i64 %1 @@ -65,9 +65,9 @@ define i64 @select_cmp_cmov_i64(i64 %a, i64 %b) { ; CHECK-LABEL: select_cmp_cmov_i64: ; CHECK: ## BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovbq %rdi, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: cmovbq %rdi, %rax ; CHECK-NEXT: retq %1 = icmp ult i64 %a, %b %2 = select i1 %1, i64 %a, i64 %b Index: test/CodeGen/X86/fast-isel-select-cmov2.ll =================================================================== --- test/CodeGen/X86/fast-isel-select-cmov2.ll +++ test/CodeGen/X86/fast-isel-select-cmov2.ll @@ -19,30 +19,30 @@ define i64 @select_fcmp_oeq_cmov(double %a, double %b, i64 %c, i64 %d) { ; SDAG-LABEL: select_fcmp_oeq_cmov: ; SDAG: ## BB#0: -; SDAG-NEXT: ucomisd %xmm1, %xmm0 -; SDAG-NEXT: cmovneq %rsi, %rdi -; SDAG-NEXT: cmovpq %rsi, %rdi ; SDAG-NEXT: movq %rdi, %rax +; SDAG-NEXT: ucomisd %xmm1, %xmm0 +; SDAG-NEXT: cmovneq %rsi, %rax +; SDAG-NEXT: cmovpq %rsi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: select_fcmp_oeq_cmov: ; FAST: ## BB#0: -; FAST-NEXT: ucomisd %xmm1, %xmm0 -; FAST-NEXT: setnp %al -; FAST-NEXT: sete %cl -; FAST-NEXT: testb %al, %cl -; FAST-NEXT: cmoveq %rsi, %rdi ; FAST-NEXT: movq %rdi, %rax +; FAST-NEXT: ucomisd %xmm1, %xmm0 +; FAST-NEXT: setnp %cl +; FAST-NEXT: sete %dl +; FAST-NEXT: testb %cl, %dl +; FAST-NEXT: cmoveq %rsi, %rax ; FAST-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_oeq_cmov: ; FAST_AVX: ## BB#0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: setnp %al -; FAST_AVX-NEXT: sete %cl -; FAST_AVX-NEXT: testb %al, %cl -; FAST_AVX-NEXT: cmoveq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: setnp %cl +; FAST_AVX-NEXT: sete %dl +; FAST_AVX-NEXT: testb %cl, %dl +; FAST_AVX-NEXT: cmoveq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp oeq double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -52,16 +52,16 @@ define i64 @select_fcmp_ogt_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ogt_cmov: ; NOAVX: ## BB#0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovbeq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovbeq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ogt_cmov: ; FAST_AVX: ## BB#0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovbeq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovbeq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ogt double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -71,16 +71,16 @@ define i64 @select_fcmp_oge_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_oge_cmov: ; NOAVX: ## BB#0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovbq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovbq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_oge_cmov: ; FAST_AVX: ## BB#0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovbq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovbq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp oge double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -90,16 +90,16 @@ define i64 @select_fcmp_olt_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_olt_cmov: ; NOAVX: ## BB#0: -; NOAVX-NEXT: ucomisd %xmm0, %xmm1 -; NOAVX-NEXT: cmovbeq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm0, %xmm1 +; NOAVX-NEXT: cmovbeq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_olt_cmov: ; FAST_AVX: ## BB#0: -; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 -; FAST_AVX-NEXT: cmovbeq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 +; FAST_AVX-NEXT: cmovbeq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp olt double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -109,16 +109,16 @@ define i64 @select_fcmp_ole_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ole_cmov: ; NOAVX: ## BB#0: -; NOAVX-NEXT: ucomisd %xmm0, %xmm1 -; NOAVX-NEXT: cmovbq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm0, %xmm1 +; NOAVX-NEXT: cmovbq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ole_cmov: ; FAST_AVX: ## BB#0: -; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 -; FAST_AVX-NEXT: cmovbq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 +; FAST_AVX-NEXT: cmovbq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ole double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -128,16 +128,16 @@ define i64 @select_fcmp_one_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_one_cmov: ; NOAVX: ## BB#0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmoveq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmoveq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_one_cmov: ; FAST_AVX: ## BB#0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmoveq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmoveq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp one double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -147,16 +147,16 @@ define i64 @select_fcmp_ord_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ord_cmov: ; NOAVX: ## BB#0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovpq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovpq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ord_cmov: ; FAST_AVX: ## BB#0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovpq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovpq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ord double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -166,16 +166,16 @@ define i64 @select_fcmp_uno_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_uno_cmov: ; NOAVX: ## BB#0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovnpq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovnpq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_uno_cmov: ; FAST_AVX: ## BB#0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovnpq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovnpq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp uno double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -185,16 +185,16 @@ define i64 @select_fcmp_ueq_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ueq_cmov: ; NOAVX: ## BB#0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovneq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovneq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ueq_cmov: ; FAST_AVX: ## BB#0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovneq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovneq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ueq double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -204,16 +204,16 @@ define i64 @select_fcmp_ugt_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ugt_cmov: ; NOAVX: ## BB#0: -; NOAVX-NEXT: ucomisd %xmm0, %xmm1 -; NOAVX-NEXT: cmovaeq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm0, %xmm1 +; NOAVX-NEXT: cmovaeq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ugt_cmov: ; FAST_AVX: ## BB#0: -; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 -; FAST_AVX-NEXT: cmovaeq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 +; FAST_AVX-NEXT: cmovaeq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ugt double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -223,16 +223,16 @@ define i64 @select_fcmp_uge_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_uge_cmov: ; NOAVX: ## BB#0: -; NOAVX-NEXT: ucomisd %xmm0, %xmm1 -; NOAVX-NEXT: cmovaq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm0, %xmm1 +; NOAVX-NEXT: cmovaq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_uge_cmov: ; FAST_AVX: ## BB#0: -; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 -; FAST_AVX-NEXT: cmovaq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 +; FAST_AVX-NEXT: cmovaq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp uge double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -242,16 +242,16 @@ define i64 @select_fcmp_ult_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ult_cmov: ; NOAVX: ## BB#0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovaeq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovaeq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ult_cmov: ; FAST_AVX: ## BB#0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovaeq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovaeq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ult double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -261,16 +261,16 @@ define i64 @select_fcmp_ule_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ule_cmov: ; NOAVX: ## BB#0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovaq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovaq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ule_cmov: ; FAST_AVX: ## BB#0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovaq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovaq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ule double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -280,30 +280,30 @@ define i64 @select_fcmp_une_cmov(double %a, double %b, i64 %c, i64 %d) { ; SDAG-LABEL: select_fcmp_une_cmov: ; SDAG: ## BB#0: -; SDAG-NEXT: ucomisd %xmm1, %xmm0 -; SDAG-NEXT: cmovneq %rdi, %rsi -; SDAG-NEXT: cmovpq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: ucomisd %xmm1, %xmm0 +; SDAG-NEXT: cmovneq %rdi, %rax +; SDAG-NEXT: cmovpq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: select_fcmp_une_cmov: ; FAST: ## BB#0: -; FAST-NEXT: ucomisd %xmm1, %xmm0 -; FAST-NEXT: setp %al -; FAST-NEXT: setne %cl -; FAST-NEXT: orb %al, %cl -; FAST-NEXT: cmoveq %rsi, %rdi ; FAST-NEXT: movq %rdi, %rax +; FAST-NEXT: ucomisd %xmm1, %xmm0 +; FAST-NEXT: setp %cl +; FAST-NEXT: setne %dl +; FAST-NEXT: orb %cl, %dl +; FAST-NEXT: cmoveq %rsi, %rax ; FAST-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_une_cmov: ; FAST_AVX: ## BB#0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: setp %al -; FAST_AVX-NEXT: setne %cl -; FAST_AVX-NEXT: orb %al, %cl -; FAST_AVX-NEXT: cmoveq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: setp %cl +; FAST_AVX-NEXT: setne %dl +; FAST_AVX-NEXT: orb %cl, %dl +; FAST_AVX-NEXT: cmoveq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp une double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -323,9 +323,9 @@ define i64 @select_icmp_eq_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_eq_cmov: ; CHECK: ## BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovneq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovneq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp eq i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -335,9 +335,9 @@ define i64 @select_icmp_ne_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_ne_cmov: ; CHECK: ## BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmoveq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp ne i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -347,9 +347,9 @@ define i64 @select_icmp_ugt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_ugt_cmov: ; CHECK: ## BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovbeq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovbeq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp ugt i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -360,9 +360,9 @@ define i64 @select_icmp_uge_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_uge_cmov: ; CHECK: ## BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovbq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovbq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp uge i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -372,9 +372,9 @@ define i64 @select_icmp_ult_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_ult_cmov: ; CHECK: ## BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovaeq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovaeq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp ult i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -384,9 +384,9 @@ define i64 @select_icmp_ule_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_ule_cmov: ; CHECK: ## BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovaq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovaq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp ule i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -396,9 +396,9 @@ define i64 @select_icmp_sgt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_sgt_cmov: ; CHECK: ## BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovleq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovleq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp sgt i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -408,9 +408,9 @@ define i64 @select_icmp_sge_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_sge_cmov: ; CHECK: ## BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovlq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovlq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp sge i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -420,9 +420,9 @@ define i64 @select_icmp_slt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_slt_cmov: ; CHECK: ## BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovgeq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovgeq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp slt i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -432,9 +432,9 @@ define i64 @select_icmp_sle_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_sle_cmov: ; CHECK: ## BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovgq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovgq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp sle i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d Index: test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll =================================================================== --- test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll +++ test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll @@ -280,12 +280,13 @@ define i8 @select_icmp_sle_i8(i64 %a, i64 %b, i8 %c, i8 %d) { ; CHECK-LABEL: select_icmp_sle_i8: ; CHECK: ## BB#0: +; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: cmpq %rsi, %rdi ; CHECK-NEXT: jle LBB12_2 ; CHECK-NEXT: ## BB#1: -; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: LBB12_2: -; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %1 = icmp sle i64 %a, %b %2 = select i1 %1, i8 %c, i8 %d Index: test/CodeGen/X86/fast-isel-sext-zext.ll =================================================================== --- test/CodeGen/X86/fast-isel-sext-zext.ll +++ test/CodeGen/X86/fast-isel-sext-zext.ll @@ -9,15 +9,14 @@ ; X32-NEXT: andb $1, %al ; X32-NEXT: negb %al ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test1: ; X64: ## BB#0: -; X64-NEXT: andb $1, %dil -; X64-NEXT: negb %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $1, %al +; X64-NEXT: negb %al +; X64-NEXT: ## kill: %AL %AL %EAX ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i8 %x to i1 %u = sext i1 %z to i8 ret i8 %u @@ -32,7 +31,6 @@ ; X32-NEXT: movsbl %al, %eax ; X32-NEXT: ## kill: %AX %AX %EAX ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test2: ; X64: ## BB#0: @@ -41,7 +39,6 @@ ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: ## kill: %AX %AX %EAX ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i16 %x to i1 %u = sext i1 %z to i16 ret i16 %u @@ -55,7 +52,6 @@ ; X32-NEXT: negb %al ; X32-NEXT: movsbl %al, %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test3: ; X64: ## BB#0: @@ -63,7 +59,6 @@ ; X64-NEXT: negb %dil ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i32 %x to i1 %u = sext i1 %z to i32 ret i32 %u @@ -77,7 +72,6 @@ ; X32-NEXT: negb %al ; X32-NEXT: movsbl %al, %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test4: ; X64: ## BB#0: @@ -85,7 +79,6 @@ ; X64-NEXT: negb %dil ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i32 %x to i1 %u = sext i1 %z to i32 ret i32 %u @@ -97,14 +90,13 @@ ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: andb $1, %al ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test5: ; X64: ## BB#0: -; X64-NEXT: andb $1, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $1, %al +; X64-NEXT: ## kill: %AL %AL %EAX ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i8 %x to i1 %u = zext i1 %z to i8 ret i8 %u @@ -118,7 +110,6 @@ ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: ## kill: %AX %AX %EAX ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test6: ; X64: ## BB#0: @@ -126,7 +117,6 @@ ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: ## kill: %AX %AX %EAX ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i16 %x to i1 %u = zext i1 %z to i16 ret i16 %u @@ -139,14 +129,12 @@ ; X32-NEXT: andb $1, %al ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test7: ; X64: ## BB#0: ; X64-NEXT: andb $1, %dil ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i32 %x to i1 %u = zext i1 %z to i32 ret i32 %u @@ -159,14 +147,12 @@ ; X32-NEXT: andb $1, %al ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test8: ; X64: ## BB#0: ; X64-NEXT: andb $1, %dil ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i32 %x to i1 %u = zext i1 %z to i32 ret i32 %u @@ -178,14 +164,12 @@ ; X32-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: ## kill: %AX %AX %EAX ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test9: ; X64: ## BB#0: ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: ## kill: %AX %AX %EAX ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i8 %x to i16 ret i16 %u } @@ -195,13 +179,11 @@ ; X32: ## BB#0: ; X32-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test10: ; X64: ## BB#0: ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i8 %x to i32 ret i32 %u } @@ -213,13 +195,11 @@ ; X32-NEXT: movl %eax, %edx ; X32-NEXT: sarl $31, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test11: ; X64: ## BB#0: ; X64-NEXT: movsbq %dil, %rax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i8 %x to i64 ret i64 %u } @@ -230,14 +210,12 @@ ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: ## kill: %AX %AX %EAX ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test12: ; X64: ## BB#0: ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: ## kill: %AX %AX %EAX ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i8 %x to i16 ret i16 %u } @@ -247,13 +225,11 @@ ; X32: ## BB#0: ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test13: ; X64: ## BB#0: ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i8 %x to i32 ret i32 %u } @@ -264,13 +240,11 @@ ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test14: ; X64: ## BB#0: ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i8 %x to i64 ret i64 %u } @@ -280,13 +254,11 @@ ; X32: ## BB#0: ; X32-NEXT: movswl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test15: ; X64: ## BB#0: ; X64-NEXT: movswl %di, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i16 %x to i32 ret i32 %u } @@ -298,13 +270,11 @@ ; X32-NEXT: movl %eax, %edx ; X32-NEXT: sarl $31, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test16: ; X64: ## BB#0: ; X64-NEXT: movswq %di, %rax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i16 %x to i64 ret i64 %u } @@ -314,13 +284,11 @@ ; X32: ## BB#0: ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test17: ; X64: ## BB#0: ; X64-NEXT: movzwl %di, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i16 %x to i32 ret i32 %u } @@ -331,13 +299,11 @@ ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test18: ; X64: ## BB#0: ; X64-NEXT: movzwl %di, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i16 %x to i64 ret i64 %u } @@ -349,13 +315,11 @@ ; X32-NEXT: movl %eax, %edx ; X32-NEXT: sarl $31, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test19: ; X64: ## BB#0: ; X64-NEXT: movslq %edi, %rax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i32 %x to i64 ret i64 %u } @@ -366,13 +330,11 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test20: ; X64: ## BB#0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i32 %x to i64 ret i64 %u } Index: test/CodeGen/X86/fast-isel-shift.ll =================================================================== --- test/CodeGen/X86/fast-isel-shift.ll +++ test/CodeGen/X86/fast-isel-shift.ll @@ -5,8 +5,10 @@ ; CHECK-LABEL: shl_i8: ; CHECK: ## BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: shlb %cl, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: %CL %CL %ECX +; CHECK-NEXT: shlb %cl, %al +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %c = shl i8 %a, %b ret i8 %c @@ -16,9 +18,11 @@ ; CHECK-LABEL: shl_i16: ; CHECK: ## BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: %CL %CX -; CHECK-NEXT: shlw %cl, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: %CX %CX %ECX +; CHECK-NEXT: ## kill: %CL %CX +; CHECK-NEXT: shlw %cl, %ax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %c = shl i16 %a, %b ret i16 %c @@ -28,9 +32,9 @@ ; CHECK-LABEL: shl_i32: ; CHECK: ## BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: %CL %ECX -; CHECK-NEXT: shll %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: %CL %ECX +; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: retq %c = shl i32 %a, %b ret i32 %c @@ -40,9 +44,9 @@ ; CHECK-LABEL: shl_i64: ; CHECK: ## BB#0: ; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: ## kill: %CL %RCX -; CHECK-NEXT: shlq %cl, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: ## kill: %CL %RCX +; CHECK-NEXT: shlq %cl, %rax ; CHECK-NEXT: retq %c = shl i64 %a, %b ret i64 %c @@ -52,8 +56,10 @@ ; CHECK-LABEL: lshr_i8: ; CHECK: ## BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: shrb %cl, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: %CL %CL %ECX +; CHECK-NEXT: shrb %cl, %al +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %c = lshr i8 %a, %b ret i8 %c @@ -63,9 +69,11 @@ ; CHECK-LABEL: lshr_i16: ; CHECK: ## BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: %CL %CX -; CHECK-NEXT: shrw %cl, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: %CX %CX %ECX +; CHECK-NEXT: ## kill: %CL %CX +; CHECK-NEXT: shrw %cl, %ax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %c = lshr i16 %a, %b ret i16 %c @@ -75,9 +83,9 @@ ; CHECK-LABEL: lshr_i32: ; CHECK: ## BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: %CL %ECX -; CHECK-NEXT: shrl %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: %CL %ECX +; CHECK-NEXT: shrl %cl, %eax ; CHECK-NEXT: retq %c = lshr i32 %a, %b ret i32 %c @@ -87,9 +95,9 @@ ; CHECK-LABEL: lshr_i64: ; CHECK: ## BB#0: ; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: ## kill: %CL %RCX -; CHECK-NEXT: shrq %cl, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: ## kill: %CL %RCX +; CHECK-NEXT: shrq %cl, %rax ; CHECK-NEXT: retq %c = lshr i64 %a, %b ret i64 %c @@ -99,8 +107,10 @@ ; CHECK-LABEL: ashr_i8: ; CHECK: ## BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: sarb %cl, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: %CL %CL %ECX +; CHECK-NEXT: sarb %cl, %al +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %c = ashr i8 %a, %b ret i8 %c @@ -110,9 +120,11 @@ ; CHECK-LABEL: ashr_i16: ; CHECK: ## BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: %CL %CX -; CHECK-NEXT: sarw %cl, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: %CX %CX %ECX +; CHECK-NEXT: ## kill: %CL %CX +; CHECK-NEXT: sarw %cl, %ax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %c = ashr i16 %a, %b ret i16 %c @@ -122,9 +134,9 @@ ; CHECK-LABEL: ashr_i32: ; CHECK: ## BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: %CL %ECX -; CHECK-NEXT: sarl %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: %CL %ECX +; CHECK-NEXT: sarl %cl, %eax ; CHECK-NEXT: retq %c = ashr i32 %a, %b ret i32 %c @@ -134,9 +146,9 @@ ; CHECK-LABEL: ashr_i64: ; CHECK: ## BB#0: ; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: ## kill: %CL %RCX -; CHECK-NEXT: sarq %cl, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: ## kill: %CL %RCX +; CHECK-NEXT: sarq %cl, %rax ; CHECK-NEXT: retq %c = ashr i64 %a, %b ret i64 %c @@ -145,8 +157,9 @@ define i8 @shl_imm1_i8(i8 %a) { ; CHECK-LABEL: shl_imm1_i8: ; CHECK: ## BB#0: -; CHECK-NEXT: shlb $1, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlb $1, %al +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %c = shl i8 %a, 1 ret i8 %c @@ -185,8 +198,9 @@ define i8 @lshr_imm1_i8(i8 %a) { ; CHECK-LABEL: lshr_imm1_i8: ; CHECK: ## BB#0: -; CHECK-NEXT: shrb $1, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrb $1, %al +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %c = lshr i8 %a, 1 ret i8 %c @@ -195,8 +209,9 @@ define i16 @lshr_imm1_i16(i16 %a) { ; CHECK-LABEL: lshr_imm1_i16: ; CHECK: ## BB#0: -; CHECK-NEXT: shrw $1, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrw $1, %ax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %c = lshr i16 %a, 1 ret i16 %c @@ -205,8 +220,8 @@ define i32 @lshr_imm1_i32(i32 %a) { ; CHECK-LABEL: lshr_imm1_i32: ; CHECK: ## BB#0: -; CHECK-NEXT: shrl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $1, %eax ; CHECK-NEXT: retq %c = lshr i32 %a, 1 ret i32 %c @@ -215,8 +230,8 @@ define i64 @lshr_imm1_i64(i64 %a) { ; CHECK-LABEL: lshr_imm1_i64: ; CHECK: ## BB#0: -; CHECK-NEXT: shrq $1, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shrq $1, %rax ; CHECK-NEXT: retq %c = lshr i64 %a, 1 ret i64 %c @@ -225,8 +240,9 @@ define i8 @ashr_imm1_i8(i8 %a) { ; CHECK-LABEL: ashr_imm1_i8: ; CHECK: ## BB#0: -; CHECK-NEXT: sarb $1, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarb $1, %al +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %c = ashr i8 %a, 1 ret i8 %c @@ -235,8 +251,9 @@ define i16 @ashr_imm1_i16(i16 %a) { ; CHECK-LABEL: ashr_imm1_i16: ; CHECK: ## BB#0: -; CHECK-NEXT: sarw $1, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarw $1, %ax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %c = ashr i16 %a, 1 ret i16 %c @@ -245,8 +262,8 @@ define i32 @ashr_imm1_i32(i32 %a) { ; CHECK-LABEL: ashr_imm1_i32: ; CHECK: ## BB#0: -; CHECK-NEXT: sarl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarl $1, %eax ; CHECK-NEXT: retq %c = ashr i32 %a, 1 ret i32 %c @@ -255,8 +272,8 @@ define i64 @ashr_imm1_i64(i64 %a) { ; CHECK-LABEL: ashr_imm1_i64: ; CHECK: ## BB#0: -; CHECK-NEXT: sarq $1, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: sarq $1, %rax ; CHECK-NEXT: retq %c = ashr i64 %a, 1 ret i64 %c @@ -265,8 +282,9 @@ define i8 @shl_imm4_i8(i8 %a) { ; CHECK-LABEL: shl_imm4_i8: ; CHECK: ## BB#0: -; CHECK-NEXT: shlb $4, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlb $4, %al +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %c = shl i8 %a, 4 ret i8 %c @@ -275,8 +293,9 @@ define i16 @shl_imm4_i16(i16 %a) { ; CHECK-LABEL: shl_imm4_i16: ; CHECK: ## BB#0: -; CHECK-NEXT: shlw $4, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlw $4, %ax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %c = shl i16 %a, 4 ret i16 %c @@ -285,8 +304,8 @@ define i32 @shl_imm4_i32(i32 %a) { ; CHECK-LABEL: shl_imm4_i32: ; CHECK: ## BB#0: -; CHECK-NEXT: shll $4, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shll $4, %eax ; CHECK-NEXT: retq %c = shl i32 %a, 4 ret i32 %c @@ -295,8 +314,8 @@ define i64 @shl_imm4_i64(i64 %a) { ; CHECK-LABEL: shl_imm4_i64: ; CHECK: ## BB#0: -; CHECK-NEXT: shlq $4, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shlq $4, %rax ; CHECK-NEXT: retq %c = shl i64 %a, 4 ret i64 %c @@ -305,8 +324,9 @@ define i8 @lshr_imm4_i8(i8 %a) { ; CHECK-LABEL: lshr_imm4_i8: ; CHECK: ## BB#0: -; CHECK-NEXT: shrb $4, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrb $4, %al +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %c = lshr i8 %a, 4 ret i8 %c @@ -315,8 +335,9 @@ define i16 @lshr_imm4_i16(i16 %a) { ; CHECK-LABEL: lshr_imm4_i16: ; CHECK: ## BB#0: -; CHECK-NEXT: shrw $4, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrw $4, %ax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %c = lshr i16 %a, 4 ret i16 %c @@ -325,8 +346,8 @@ define i32 @lshr_imm4_i32(i32 %a) { ; CHECK-LABEL: lshr_imm4_i32: ; CHECK: ## BB#0: -; CHECK-NEXT: shrl $4, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $4, %eax ; CHECK-NEXT: retq %c = lshr i32 %a, 4 ret i32 %c @@ -335,8 +356,8 @@ define i64 @lshr_imm4_i64(i64 %a) { ; CHECK-LABEL: lshr_imm4_i64: ; CHECK: ## BB#0: -; CHECK-NEXT: shrq $4, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shrq $4, %rax ; CHECK-NEXT: retq %c = lshr i64 %a, 4 ret i64 %c @@ -345,8 +366,9 @@ define i8 @ashr_imm4_i8(i8 %a) { ; CHECK-LABEL: ashr_imm4_i8: ; CHECK: ## BB#0: -; CHECK-NEXT: sarb $4, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarb $4, %al +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %c = ashr i8 %a, 4 ret i8 %c @@ -355,8 +377,9 @@ define i16 @ashr_imm4_i16(i16 %a) { ; CHECK-LABEL: ashr_imm4_i16: ; CHECK: ## BB#0: -; CHECK-NEXT: sarw $4, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarw $4, %ax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %c = ashr i16 %a, 4 ret i16 %c @@ -365,8 +388,8 @@ define i32 @ashr_imm4_i32(i32 %a) { ; CHECK-LABEL: ashr_imm4_i32: ; CHECK: ## BB#0: -; CHECK-NEXT: sarl $4, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarl $4, %eax ; CHECK-NEXT: retq %c = ashr i32 %a, 4 ret i32 %c @@ -375,8 +398,8 @@ define i64 @ashr_imm4_i64(i64 %a) { ; CHECK-LABEL: ashr_imm4_i64: ; CHECK: ## BB#0: -; CHECK-NEXT: sarq $4, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: sarq $4, %rax ; CHECK-NEXT: retq %c = ashr i64 %a, 4 ret i64 %c Index: test/CodeGen/X86/fast-isel-store.ll =================================================================== --- test/CodeGen/X86/fast-isel-store.ll +++ test/CodeGen/X86/fast-isel-store.ll @@ -11,8 +11,8 @@ define i32 @test_store_32(i32* nocapture %addr, i32 %value) { ; ALL32-LABEL: test_store_32: ; ALL32: # BB#0: # %entry -; ALL32-NEXT: movl %esi, (%rdi) ; ALL32-NEXT: movl %esi, %eax +; ALL32-NEXT: movl %eax, (%rdi) ; ALL32-NEXT: retq ; ; ALL64-LABEL: test_store_32: @@ -29,8 +29,9 @@ define i16 @test_store_16(i16* nocapture %addr, i16 %value) { ; ALL32-LABEL: test_store_16: ; ALL32: # BB#0: # %entry -; ALL32-NEXT: movw %si, (%rdi) ; ALL32-NEXT: movl %esi, %eax +; ALL32-NEXT: movw %ax, (%rdi) +; ALL32-NEXT: # kill: %AX %AX %EAX ; ALL32-NEXT: retq ; ; ALL64-LABEL: test_store_16: @@ -58,11 +59,11 @@ ; SSE64-NEXT: movdqu %xmm0, (%eax) ; SSE64-NEXT: retl ; -; AVXONLY32-LABEL: test_store_4xi32: -; AVXONLY32: # BB#0: -; AVXONLY32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVXONLY32-NEXT: vmovdqu %xmm0, (%rdi) -; AVXONLY32-NEXT: retq +; AVX32-LABEL: test_store_4xi32: +; AVX32: # BB#0: +; AVX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX32-NEXT: vmovdqu %xmm0, (%rdi) +; AVX32-NEXT: retq ; ; AVX64-LABEL: test_store_4xi32: ; AVX64: # BB#0: @@ -70,18 +71,6 @@ ; AVX64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX64-NEXT: vmovdqu %xmm0, (%eax) ; AVX64-NEXT: retl -; -; KNL32-LABEL: test_store_4xi32: -; KNL32: # BB#0: -; KNL32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; KNL32-NEXT: vmovdqu %xmm0, (%rdi) -; KNL32-NEXT: retq -; -; SKX32-LABEL: test_store_4xi32: -; SKX32: # BB#0: -; SKX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; SKX32-NEXT: vmovdqu %xmm0, (%rdi) -; SKX32-NEXT: retq %foo = add <4 x i32> %value, %value2 ; to force integer type on store store <4 x i32> %foo, <4 x i32>* %addr, align 1 ret <4 x i32> %foo @@ -101,11 +90,11 @@ ; SSE64-NEXT: movdqa %xmm0, (%eax) ; SSE64-NEXT: retl ; -; AVXONLY32-LABEL: test_store_4xi32_aligned: -; AVXONLY32: # BB#0: -; AVXONLY32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVXONLY32-NEXT: vmovdqa %xmm0, (%rdi) -; AVXONLY32-NEXT: retq +; AVX32-LABEL: test_store_4xi32_aligned: +; AVX32: # BB#0: +; AVX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX32-NEXT: vmovdqa %xmm0, (%rdi) +; AVX32-NEXT: retq ; ; AVX64-LABEL: test_store_4xi32_aligned: ; AVX64: # BB#0: @@ -113,18 +102,6 @@ ; AVX64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX64-NEXT: vmovdqa %xmm0, (%eax) ; AVX64-NEXT: retl -; -; KNL32-LABEL: test_store_4xi32_aligned: -; KNL32: # BB#0: -; KNL32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; KNL32-NEXT: vmovdqa %xmm0, (%rdi) -; KNL32-NEXT: retq -; -; SKX32-LABEL: test_store_4xi32_aligned: -; SKX32: # BB#0: -; SKX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; SKX32-NEXT: vmovdqa %xmm0, (%rdi) -; SKX32-NEXT: retq %foo = add <4 x i32> %value, %value2 ; to force integer type on store store <4 x i32> %foo, <4 x i32>* %addr, align 16 ret <4 x i32> %foo Index: test/CodeGen/X86/fixup-bw-copy.ll =================================================================== --- test/CodeGen/X86/fixup-bw-copy.ll +++ test/CodeGen/X86/fixup-bw-copy.ll @@ -7,15 +7,11 @@ target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" define i8 @test_movb(i8 %a0) { -; BWON64-LABEL: test_movb: -; BWON64: # BB#0: -; BWON64-NEXT: movl %edi, %eax -; BWON64-NEXT: retq -; -; BWOFF64-LABEL: test_movb: -; BWOFF64: # BB#0: -; BWOFF64-NEXT: movb %dil, %al -; BWOFF64-NEXT: retq +; X64-LABEL: test_movb: +; X64: # BB#0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AL %AL %EAX +; X64-NEXT: retq ; ; X32-LABEL: test_movb: ; X32: # BB#0: @@ -25,15 +21,11 @@ } define i16 @test_movw(i16 %a0) { -; BWON64-LABEL: test_movw: -; BWON64: # BB#0: -; BWON64-NEXT: movl %edi, %eax -; BWON64-NEXT: retq -; -; BWOFF64-LABEL: test_movw: -; BWOFF64: # BB#0: -; BWOFF64-NEXT: movw %di, %ax -; BWOFF64-NEXT: retq +; X64-LABEL: test_movw: +; X64: # BB#0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AX %AX %EAX +; X64-NEXT: retq ; ; BWON32-LABEL: test_movw: ; BWON32: # BB#0: Index: test/CodeGen/X86/fma-fneg-combine.ll =================================================================== --- test/CodeGen/X86/fma-fneg-combine.ll +++ test/CodeGen/X86/fma-fneg-combine.ll @@ -163,16 +163,16 @@ define <4 x float> @test11b(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { ; SKX-LABEL: test11b: ; SKX: # BB#0: # %entry -; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm1 {%k1} ; SKX-NEXT: vmovaps %xmm1, %xmm0 +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm0 {%k1} ; SKX-NEXT: retq ; ; KNL-LABEL: test11b: ; KNL: # BB#0: # %entry -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm1 {%k1} ; KNL-NEXT: vmovaps %xmm1, %xmm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm0 {%k1} ; KNL-NEXT: retq entry: %sub.i = fsub <4 x float> , %c Index: test/CodeGen/X86/fold-vector-sext-crash2.ll =================================================================== --- test/CodeGen/X86/fold-vector-sext-crash2.ll +++ test/CodeGen/X86/fold-vector-sext-crash2.ll @@ -28,14 +28,14 @@ ; ; X64-LABEL: test_sext1: ; X64: # BB#0: -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq $-1, 56(%rdi) -; X64-NEXT: movq $-1, 48(%rdi) -; X64-NEXT: movq $-1, 40(%rdi) -; X64-NEXT: movq $-99, 32(%rdi) ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) +; X64-NEXT: movq $-1, 56(%rax) +; X64-NEXT: movq $-1, 48(%rax) +; X64-NEXT: movq $-1, 40(%rax) +; X64-NEXT: movq $-99, 32(%rax) ; X64-NEXT: retq %Se = sext <2 x i8> to <2 x i256> %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> @@ -66,14 +66,14 @@ ; ; X64-LABEL: test_sext2: ; X64: # BB#0: -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq $-1, 56(%rdi) -; X64-NEXT: movq $-1, 48(%rdi) -; X64-NEXT: movq $-1, 40(%rdi) -; X64-NEXT: movq $-1999, 32(%rdi) # imm = 0xF831 ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) +; X64-NEXT: movq $-1, 56(%rax) +; X64-NEXT: movq $-1, 48(%rax) +; X64-NEXT: movq $-1, 40(%rax) +; X64-NEXT: movq $-1999, 32(%rax) # imm = 0xF831 ; X64-NEXT: retq %Se = sext <2 x i128> to <2 x i256> %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> @@ -104,13 +104,13 @@ ; ; X64-LABEL: test_zext1: ; X64: # BB#0: -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 48(%rdi) -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq $0, 40(%rdi) -; X64-NEXT: movq $254, 32(%rdi) ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, 48(%rax) +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) +; X64-NEXT: movq $0, 40(%rax) +; X64-NEXT: movq $254, 32(%rax) ; X64-NEXT: retq %Se = zext <2 x i8> to <2 x i256> %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> @@ -141,13 +141,13 @@ ; ; X64-LABEL: test_zext2: ; X64: # BB#0: -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 48(%rdi) -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq $-1, 40(%rdi) -; X64-NEXT: movq $-2, 32(%rdi) ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, 48(%rax) +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) +; X64-NEXT: movq $-1, 40(%rax) +; X64-NEXT: movq $-2, 32(%rax) ; X64-NEXT: retq %Se = zext <2 x i128> to <2 x i256> %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> Index: test/CodeGen/X86/ghc-cc64.ll =================================================================== --- test/CodeGen/X86/ghc-cc64.ll +++ test/CodeGen/X86/ghc-cc64.ll @@ -22,8 +22,8 @@ define void @zap(i64 %a, i64 %b) nounwind { entry: - ; CHECK: movq %rdi, %r13 - ; CHECK-NEXT: movq %rsi, %rbp + ; CHECK: movq %rsi, %rbp + ; CHECK-NEXT: movq %rdi, %r13 ; CHECK-NEXT: callq addtwo %0 = call ghccc i64 @addtwo(i64 %a, i64 %b) ; CHECK: callq foo Index: test/CodeGen/X86/hipe-cc64.ll =================================================================== --- test/CodeGen/X86/hipe-cc64.ll +++ test/CodeGen/X86/hipe-cc64.ll @@ -4,11 +4,10 @@ define void @zap(i64 %a, i64 %b) nounwind { entry: - ; CHECK: movq %rsi, %rax + ; CHECK: movq %rsi, %rdx ; CHECK-NEXT: movl $8, %ecx ; CHECK-NEXT: movl $9, %r8d ; CHECK-NEXT: movq %rdi, %rsi - ; CHECK-NEXT: movq %rax, %rdx ; CHECK-NEXT: callq addfour %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9) %res = extractvalue {i64, i64, i64} %0, 2 Index: test/CodeGen/X86/iabs.ll =================================================================== --- test/CodeGen/X86/iabs.ll +++ test/CodeGen/X86/iabs.ll @@ -22,10 +22,11 @@ ; X64-LABEL: test_i8: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: sarb $7, %al -; X64-NEXT: addb %al, %dil -; X64-NEXT: xorb %al, %dil -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: sarb $7, %cl +; X64-NEXT: addb %cl, %al +; X64-NEXT: xorb %cl, %al +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq %tmp1neg = sub i8 0, %a %b = icmp sgt i8 %a, -1 Index: test/CodeGen/X86/imul.ll =================================================================== --- test/CodeGen/X86/imul.ll +++ test/CodeGen/X86/imul.ll @@ -158,16 +158,16 @@ define i32 @mul4294967295_32(i32 %A) { ; X64-LABEL: mul4294967295_32: -; X64: negl %edi -; X64-NEXT: movl %edi, %eax +; X64: movl %edi, %eax +; X64-NEXT: negl %eax %mul = mul i32 %A, 4294967295 ret i32 %mul } define i64 @mul18446744073709551615_64(i64 %A) { ; X64-LABEL: mul18446744073709551615_64: -; X64: negq %rdi -; X64-NEXT: movq %rdi, %rax +; X64: movq %rdi, %rax +; X64-NEXT: negq %rax %mul = mul i64 %A, 18446744073709551615 ret i64 %mul } Index: test/CodeGen/X86/ipra-local-linkage.ll =================================================================== --- test/CodeGen/X86/ipra-local-linkage.ll +++ test/CodeGen/X86/ipra-local-linkage.ll @@ -24,7 +24,7 @@ call void @foo() ; CHECK-LABEL: bar: ; CHECK: callq foo - ; CHECK-NEXT: movl %eax, %r15d + ; CHECK-NEXT: movl %edi, %r15d call void asm sideeffect "movl $0, %r12d", "{r15}~{r12}"(i32 %X) ret void } Index: test/CodeGen/X86/legalize-shift-64.ll =================================================================== --- test/CodeGen/X86/legalize-shift-64.ll +++ test/CodeGen/X86/legalize-shift-64.ll @@ -88,6 +88,8 @@ ; CHECK-NEXT: .cfi_offset %ebx, -12 ; CHECK-NEXT: .cfi_offset %ebp, -8 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %ch +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -101,12 +103,11 @@ ; CHECK-NEXT: movl %edi, %esi ; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl %edx, %ebx -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movb %ch, %cl ; CHECK-NEXT: shll %cl, %ebx ; CHECK-NEXT: shldl %cl, %edx, %ebp -; CHECK-NEXT: testb $32, %cl +; CHECK-NEXT: testb $32, %ch ; CHECK-NEXT: je .LBB4_4 ; CHECK-NEXT: # BB#3: ; CHECK-NEXT: movl %ebx, %ebp Index: test/CodeGen/X86/legalize-shl-vec.ll =================================================================== --- test/CodeGen/X86/legalize-shl-vec.ll +++ test/CodeGen/X86/legalize-shl-vec.ll @@ -26,12 +26,12 @@ ; ; X64-LABEL: test_shl: ; X64: # BB#0: -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 48(%rdi) -; X64-NEXT: movaps %xmm0, 32(%rdi) -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, 48(%rax) +; X64-NEXT: movaps %xmm0, 32(%rax) +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) ; X64-NEXT: retq %Amt = insertelement <2 x i256> undef, i256 -1, i32 0 %Out = shl <2 x i256> %In, %Amt @@ -62,12 +62,12 @@ ; ; X64-LABEL: test_srl: ; X64: # BB#0: -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 48(%rdi) -; X64-NEXT: movaps %xmm0, 32(%rdi) -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, 48(%rax) +; X64-NEXT: movaps %xmm0, 32(%rax) +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) ; X64-NEXT: retq %Amt = insertelement <2 x i256> undef, i256 -1, i32 0 %Out = lshr <2 x i256> %In, %Amt @@ -108,19 +108,19 @@ ; ; X64-LABEL: test_sra: ; X64: # BB#0: -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi ; X64-NEXT: sarq $63, %r8 -; X64-NEXT: movq %rdx, 56(%rdi) -; X64-NEXT: movq %rcx, 48(%rdi) -; X64-NEXT: movq %rax, 40(%rdi) -; X64-NEXT: movq %r9, 32(%rdi) -; X64-NEXT: movq %r8, 24(%rdi) -; X64-NEXT: movq %r8, 16(%rdi) -; X64-NEXT: movq %r8, 8(%rdi) -; X64-NEXT: movq %r8, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rsi, 56(%rax) +; X64-NEXT: movq %rdx, 48(%rax) +; X64-NEXT: movq %rcx, 40(%rax) +; X64-NEXT: movq %r9, 32(%rax) +; X64-NEXT: movq %r8, 24(%rax) +; X64-NEXT: movq %r8, 16(%rax) +; X64-NEXT: movq %r8, 8(%rax) +; X64-NEXT: movq %r8, (%rax) ; X64-NEXT: retq %Amt = insertelement <2 x i256> undef, i256 -1, i32 0 %Out = ashr <2 x i256> %In, %Amt Index: test/CodeGen/X86/machine-combiner-int.ll =================================================================== --- test/CodeGen/X86/machine-combiner-int.ll +++ test/CodeGen/X86/machine-combiner-int.ll @@ -62,10 +62,11 @@ define i8 @reassociate_ands_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_ands_i8: ; CHECK: # BB#0: -; CHECK-NEXT: subb %sil, %dil -; CHECK-NEXT: andb %cl, %dl -; CHECK-NEXT: andb %dil, %dl ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: andb %cl, %al +; CHECK-NEXT: andb %dil, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = and i8 %x2, %t0 @@ -78,10 +79,10 @@ define i32 @reassociate_ands_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_ands_i32: ; CHECK: # BB#0: -; CHECK-NEXT: subl %esi, %edi -; CHECK-NEXT: andl %ecx, %edx -; CHECK-NEXT: andl %edi, %edx ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: andl %ecx, %eax +; CHECK-NEXT: andl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = and i32 %x2, %t0 @@ -92,10 +93,10 @@ define i64 @reassociate_ands_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_ands_i64: ; CHECK: # BB#0: -; CHECK-NEXT: subq %rsi, %rdi -; CHECK-NEXT: andq %rcx, %rdx -; CHECK-NEXT: andq %rdi, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: andq %rcx, %rax +; CHECK-NEXT: andq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = and i64 %x2, %t0 @@ -109,10 +110,11 @@ define i8 @reassociate_ors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_ors_i8: ; CHECK: # BB#0: -; CHECK-NEXT: subb %sil, %dil -; CHECK-NEXT: orb %cl, %dl -; CHECK-NEXT: orb %dil, %dl ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: orb %cl, %al +; CHECK-NEXT: orb %dil, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = or i8 %x2, %t0 @@ -125,10 +127,10 @@ define i32 @reassociate_ors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_ors_i32: ; CHECK: # BB#0: -; CHECK-NEXT: subl %esi, %edi -; CHECK-NEXT: orl %ecx, %edx -; CHECK-NEXT: orl %edi, %edx ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: orl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = or i32 %x2, %t0 @@ -139,10 +141,10 @@ define i64 @reassociate_ors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_ors_i64: ; CHECK: # BB#0: -; CHECK-NEXT: subq %rsi, %rdi -; CHECK-NEXT: orq %rcx, %rdx -; CHECK-NEXT: orq %rdi, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: orq %rcx, %rax +; CHECK-NEXT: orq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = or i64 %x2, %t0 @@ -156,10 +158,11 @@ define i8 @reassociate_xors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_xors_i8: ; CHECK: # BB#0: -; CHECK-NEXT: subb %sil, %dil -; CHECK-NEXT: xorb %cl, %dl -; CHECK-NEXT: xorb %dil, %dl ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: xorb %cl, %al +; CHECK-NEXT: xorb %dil, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = xor i8 %x2, %t0 @@ -172,10 +175,10 @@ define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_xors_i32: ; CHECK: # BB#0: -; CHECK-NEXT: subl %esi, %edi -; CHECK-NEXT: xorl %ecx, %edx -; CHECK-NEXT: xorl %edi, %edx ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: xorl %ecx, %eax +; CHECK-NEXT: xorl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = xor i32 %x2, %t0 @@ -186,10 +189,10 @@ define i64 @reassociate_xors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_xors_i64: ; CHECK: # BB#0: -; CHECK-NEXT: subq %rsi, %rdi -; CHECK-NEXT: xorq %rcx, %rdx -; CHECK-NEXT: xorq %rdi, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: xorq %rcx, %rax +; CHECK-NEXT: xorq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = xor i64 %x2, %t0 Index: test/CodeGen/X86/machine-cse.ll =================================================================== --- test/CodeGen/X86/machine-cse.ll +++ test/CodeGen/X86/machine-cse.ll @@ -133,24 +133,24 @@ define i8* @bsd_memchr(i8* %s, i32 %a, i32 %c, i64 %n) nounwind ssp { ; CHECK-LABEL: bsd_memchr: ; CHECK: # BB#0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: testq %rcx, %rcx ; CHECK-NEXT: je .LBB3_4 ; CHECK-NEXT: # BB#1: # %preheader -; CHECK-NEXT: movzbl %dl, %eax +; CHECK-NEXT: movzbl %dl, %edx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB3_2: # %do.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cmpl %eax, %esi +; CHECK-NEXT: cmpl %edx, %esi ; CHECK-NEXT: je .LBB3_5 ; CHECK-NEXT: # BB#3: # %do.cond ; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 -; CHECK-NEXT: incq %rdi +; CHECK-NEXT: incq %rax ; CHECK-NEXT: decq %rcx ; CHECK-NEXT: jne .LBB3_2 ; CHECK-NEXT: .LBB3_4: -; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: .LBB3_5: # %return -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq entry: %cmp = icmp eq i64 %n, 0 Index: test/CodeGen/X86/mask-negated-bool.ll =================================================================== --- test/CodeGen/X86/mask-negated-bool.ll +++ test/CodeGen/X86/mask-negated-bool.ll @@ -4,8 +4,8 @@ define i32 @mask_negated_zext_bool1(i1 %x) { ; CHECK-LABEL: mask_negated_zext_bool1: ; CHECK: # BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %ext = zext i1 %x to i32 %neg = sub i32 0, %ext @@ -38,8 +38,8 @@ define i32 @mask_negated_sext_bool1(i1 %x) { ; CHECK-LABEL: mask_negated_sext_bool1: ; CHECK: # BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %ext = sext i1 %x to i32 %neg = sub i32 0, %ext Index: test/CodeGen/X86/misched-matmul.ll =================================================================== --- test/CodeGen/X86/misched-matmul.ll +++ test/CodeGen/X86/misched-matmul.ll @@ -10,7 +10,7 @@ ; more complex cases. ; ; CHECK: @wrap_mul4 -; CHECK: 23 regalloc - Number of spills inserted +; CHECK: 25 regalloc - Number of spills inserted define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 { entry: Index: test/CodeGen/X86/mul-constant-i16.ll =================================================================== --- test/CodeGen/X86/mul-constant-i16.ll +++ test/CodeGen/X86/mul-constant-i16.ll @@ -11,6 +11,7 @@ ; X64-LABEL: test_mul_by_1: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 1 ret i16 %mul @@ -296,8 +297,9 @@ ; ; X64-LABEL: test_mul_by_16: ; X64: # BB#0: -; X64-NEXT: shll $4, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $4, %eax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 16 ret i16 %mul @@ -633,8 +635,9 @@ ; ; X64-LABEL: test_mul_by_32: ; X64: # BB#0: -; X64-NEXT: shll $5, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $5, %eax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 32 ret i16 %mul Index: test/CodeGen/X86/mul-constant-i32.ll =================================================================== --- test/CodeGen/X86/mul-constant-i32.ll +++ test/CodeGen/X86/mul-constant-i32.ll @@ -780,14 +780,14 @@ ; ; X64-HSW-LABEL: test_mul_by_16: ; X64-HSW: # BB#0: -; X64-HSW-NEXT: shll $4, %edi # sched: [1:0.50] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_16: ; X64-JAG: # BB#0: -; X64-JAG-NEXT: shll $4, %edi # sched: [1:0.50] ; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: shll $4, %eax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_16: @@ -798,26 +798,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_16: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: shll $4, %eax # sched: [1:0.50] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_16: ; JAG-NOOPT: # BB#0: -; JAG-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50] ; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17] +; JAG-NOOPT-NEXT: shll $4, %eax # sched: [1:0.50] ; JAG-NOOPT-NEXT: retq # sched: [4:1.00] ; ; X64-SLM-LABEL: test_mul_by_16: ; X64-SLM: # BB#0: -; X64-SLM-NEXT: shll $4, %edi # sched: [1:1.00] ; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: shll $4, %eax # sched: [1:1.00] ; X64-SLM-NEXT: retq # sched: [4:1.00] ; ; SLM-NOOPT-LABEL: test_mul_by_16: ; SLM-NOOPT: # BB#0: -; SLM-NOOPT-NEXT: shll $4, %edi # sched: [1:1.00] ; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NOOPT-NEXT: shll $4, %eax # sched: [1:1.00] ; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 16 ret i32 %mul @@ -1626,14 +1626,14 @@ ; ; X64-HSW-LABEL: test_mul_by_32: ; X64-HSW: # BB#0: -; X64-HSW-NEXT: shll $5, %edi # sched: [1:0.50] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_32: ; X64-JAG: # BB#0: -; X64-JAG-NEXT: shll $5, %edi # sched: [1:0.50] ; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_32: @@ -1644,26 +1644,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_32: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: shll $5, %eax # sched: [1:0.50] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_32: ; JAG-NOOPT: # BB#0: -; JAG-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50] ; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17] +; JAG-NOOPT-NEXT: shll $5, %eax # sched: [1:0.50] ; JAG-NOOPT-NEXT: retq # sched: [4:1.00] ; ; X64-SLM-LABEL: test_mul_by_32: ; X64-SLM: # BB#0: -; X64-SLM-NEXT: shll $5, %edi # sched: [1:1.00] ; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: shll $5, %eax # sched: [1:1.00] ; X64-SLM-NEXT: retq # sched: [4:1.00] ; ; SLM-NOOPT-LABEL: test_mul_by_32: ; SLM-NOOPT: # BB#0: -; SLM-NOOPT-NEXT: shll $5, %edi # sched: [1:1.00] ; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NOOPT-NEXT: shll $5, %eax # sched: [1:1.00] ; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 32 ret i32 %mul Index: test/CodeGen/X86/mul-constant-i64.ll =================================================================== --- test/CodeGen/X86/mul-constant-i64.ll +++ test/CodeGen/X86/mul-constant-i64.ll @@ -802,14 +802,14 @@ ; ; X64-HSW-LABEL: test_mul_by_16: ; X64-HSW: # BB#0: -; X64-HSW-NEXT: shlq $4, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_16: ; X64-JAG: # BB#0: -; X64-JAG-NEXT: shlq $4, %rdi # sched: [1:0.50] ; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: shlq $4, %rax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_16: @@ -822,26 +822,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_16: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: shlq $4, %rax # sched: [1:0.50] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_16: ; JAG-NOOPT: # BB#0: -; JAG-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50] ; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17] +; JAG-NOOPT-NEXT: shlq $4, %rax # sched: [1:0.50] ; JAG-NOOPT-NEXT: retq # sched: [4:1.00] ; ; X64-SLM-LABEL: test_mul_by_16: ; X64-SLM: # BB#0: -; X64-SLM-NEXT: shlq $4, %rdi # sched: [1:1.00] ; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: shlq $4, %rax # sched: [1:1.00] ; X64-SLM-NEXT: retq # sched: [4:1.00] ; ; SLM-NOOPT-LABEL: test_mul_by_16: ; SLM-NOOPT: # BB#0: -; SLM-NOOPT-NEXT: shlq $4, %rdi # sched: [1:1.00] ; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NOOPT-NEXT: shlq $4, %rax # sched: [1:1.00] ; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 16 ret i64 %mul @@ -1707,14 +1707,14 @@ ; ; X64-HSW-LABEL: test_mul_by_32: ; X64-HSW: # BB#0: -; X64-HSW-NEXT: shlq $5, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_32: ; X64-JAG: # BB#0: -; X64-JAG-NEXT: shlq $5, %rdi # sched: [1:0.50] ; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_32: @@ -1727,26 +1727,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_32: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: shlq $5, %rax # sched: [1:0.50] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_32: ; JAG-NOOPT: # BB#0: -; JAG-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50] ; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17] +; JAG-NOOPT-NEXT: shlq $5, %rax # sched: [1:0.50] ; JAG-NOOPT-NEXT: retq # sched: [4:1.00] ; ; X64-SLM-LABEL: test_mul_by_32: ; X64-SLM: # BB#0: -; X64-SLM-NEXT: shlq $5, %rdi # sched: [1:1.00] ; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: shlq $5, %rax # sched: [1:1.00] ; X64-SLM-NEXT: retq # sched: [4:1.00] ; ; SLM-NOOPT-LABEL: test_mul_by_32: ; SLM-NOOPT: # BB#0: -; SLM-NOOPT-NEXT: shlq $5, %rdi # sched: [1:1.00] ; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NOOPT-NEXT: shlq $5, %rax # sched: [1:1.00] ; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 32 ret i64 %mul Index: test/CodeGen/X86/mul-i1024.ll =================================================================== --- test/CodeGen/X86/mul-i1024.ll +++ test/CodeGen/X86/mul-i1024.ll @@ -13,7 +13,7 @@ ; X32-NEXT: subl $996, %esp # imm = 0x3E4 ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 32(%eax), %eax -; X32-NEXT: movl %eax, -188(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -192(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ebx @@ -23,8 +23,8 @@ ; X32-NEXT: movl %eax, -440(%ebp) # 4-byte Spill ; X32-NEXT: mull %ecx ; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: movl %edx, -140(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -132(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl %edx, -884(%ebp) # 4-byte Spill @@ -32,42 +32,42 @@ ; X32-NEXT: movl %eax, -416(%ebp) # 4-byte Spill ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, -400(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -324(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -320(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: movl %edx, %eax ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl %ecx, -204(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -212(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -892(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 36(%eax), %eax -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %edx, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -240(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, %edi ; X32-NEXT: movl %edi, -304(%ebp) # 4-byte Spill ; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movl %edi, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -76(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %eax ; X32-NEXT: adcl $0, %eax -; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill ; X32-NEXT: movl 36(%esi), %eax -; X32-NEXT: movl %eax, -316(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -324(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %ecx, -124(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -188(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, %edx ; X32-NEXT: movl -400(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl %esi, %edx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl -324(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ebx, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -176(%ebp) # 4-byte Spill ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -656(%ebp) # 4-byte Spill ; X32-NEXT: leal (%ebx,%edi), %eax @@ -81,13 +81,13 @@ ; X32-NEXT: movl %eax, -640(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -96(%ebp) # 4-byte Spill ; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movl %edi, -112(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, -64(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl %edi, -108(%ebp) # 4-byte Spill +; X32-NEXT: adcl %esi, -60(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl %esi, %ebx -; X32-NEXT: setb -160(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -156(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl (%eax), %eax -; X32-NEXT: movl %eax, -168(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %esi @@ -97,37 +97,37 @@ ; X32-NEXT: movl %eax, -348(%ebp) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %edx, -320(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -180(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -316(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill ; X32-NEXT: addl %esi, %eax ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl %edx, -428(%ebp) # 4-byte Spill ; X32-NEXT: movl (%ecx), %eax -; X32-NEXT: movl %eax, -260(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -256(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -264(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -260(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -132(%ebp) # 4-byte Spill ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %edx, %eax ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, -452(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, -764(%ebp) # 4-byte Spill -; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -320(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %esi, %ecx ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: movl %ebx, -424(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %ebx -; X32-NEXT: movl %ebx, -256(%ebp) # 4-byte Spill -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -204(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -220(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl %ebx, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl -176(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -76(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -212(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -224(%ebp) # 4-byte Folded Spill ; X32-NEXT: setb -388(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 4(%eax), %eax @@ -144,44 +144,44 @@ ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %bh ; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl %esi, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill ; X32-NEXT: movzbl %bh, %eax ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl %edi, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -84(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 8(%eax), %eax -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: mull %ebx -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -156(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -244(%ebp) # 4-byte Spill ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl -256(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %esi, %ecx -; X32-NEXT: movl %ecx, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -112(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %eax -; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 52(%eax), %eax ; X32-NEXT: movl %eax, -340(%ebp) # 4-byte Spill ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl -140(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl %ebx, %edi -; X32-NEXT: movl %edi, -192(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -196(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movl %ecx, %edi ; X32-NEXT: setb %cl ; X32-NEXT: addl %eax, %esi ; X32-NEXT: movzbl %cl, %eax ; X32-NEXT: adcl %edx, %eax -; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 56(%eax), %eax ; X32-NEXT: movl %eax, -408(%ebp) # 4-byte Spill @@ -193,26 +193,26 @@ ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %edi ; X32-NEXT: addl %esi, %ebx -; X32-NEXT: movl %ebx, -272(%ebp) # 4-byte Spill -; X32-NEXT: adcl -216(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -268(%ebp) # 4-byte Spill +; X32-NEXT: adcl -220(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -20(%ebp) # 4-byte Spill ; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -68(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -16(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -420(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -112(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -616(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -612(%ebp) # 4-byte Spill -; X32-NEXT: movl -64(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -184(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill -; X32-NEXT: movzbl -160(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl -124(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -188(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -60(%ebp) # 4-byte Spill +; X32-NEXT: movzbl -156(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 40(%eax), %eax ; X32-NEXT: movl %eax, -352(%ebp) # 4-byte Spill @@ -221,47 +221,47 @@ ; X32-NEXT: movl %eax, -364(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %ebx, -396(%ebp) # 4-byte Spill -; X32-NEXT: movl -324(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -320(%ebp), %edx # 4-byte Reload ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %eax, %edi ; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: addl %esi, %edi -; X32-NEXT: movl %edi, -44(%ebp) # 4-byte Spill -; X32-NEXT: adcl -152(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -48(%ebp) # 4-byte Spill +; X32-NEXT: adcl -124(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -64(%ebp) # 4-byte Spill ; X32-NEXT: addl -28(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -16(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill ; X32-NEXT: seto %al ; X32-NEXT: lahf ; X32-NEXT: movl %eax, %eax ; X32-NEXT: movl %eax, -456(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -112(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -504(%ebp) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -508(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %ecx ; X32-NEXT: movl 16(%ecx), %eax -; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -312(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %edi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %esi, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -88(%ebp) # 4-byte Spill ; X32-NEXT: movl 20(%ecx), %eax -; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -180(%ebp) # 4-byte Spill ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %esi, %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl %edi, %ebx -; X32-NEXT: movl %ebx, -164(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -160(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx @@ -272,70 +272,70 @@ ; X32-NEXT: movl %eax, -284(%ebp) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, -308(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -208(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -228(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %ebx ; X32-NEXT: addl %eax, %ebx -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movl %ebx, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -32(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, %edx -; X32-NEXT: movl -324(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -320(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl %edi, -116(%ebp) # 4-byte Spill ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl -400(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -84(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, -768(%ebp) # 4-byte Spill ; X32-NEXT: movl %esi, %eax ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, -296(%ebp) # 4-byte Spill -; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, -776(%ebp) # 4-byte Spill -; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %ebx, %eax ; X32-NEXT: movl %eax, -772(%ebp) # 4-byte Spill -; X32-NEXT: movl -52(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %ebx, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -52(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -780(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %edx # 4-byte Reload ; X32-NEXT: movl %edx, %eax ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, -448(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %eax ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, -332(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, -648(%ebp) # 4-byte Spill -; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -40(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -268(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -32(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -644(%ebp) # 4-byte Spill -; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %ebx, %eax ; X32-NEXT: movl %eax, -572(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 20(%eax), %eax -; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -320(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -316(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl %ebx, %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl -180(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl %edi, %esi -; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx @@ -347,37 +347,37 @@ ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx ; X32-NEXT: movl %eax, -280(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -312(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -308(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %edx ; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl -320(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -312(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -316(%ebp), %ebx # 4-byte Reload +; X32-NEXT: adcl -308(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movl %edi, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -44(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebx -; X32-NEXT: movl %ebx, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -24(%ebp) # 4-byte Spill ; X32-NEXT: addl -28(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -228(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -16(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -596(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -112(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -464(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -536(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 4(%eax), %eax -; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -120(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl -136(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl %ebx, %esi ; X32-NEXT: movl %esi, -276(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ecx, %edi @@ -389,20 +389,20 @@ ; X32-NEXT: movl %eax, -432(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 8(%eax), %eax -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -188(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl %ecx, -160(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -268(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -156(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -264(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %esi ; X32-NEXT: movl %esi, %eax ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl -264(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -260(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %ecx ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill ; X32-NEXT: adcl -432(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %esi, %edx ; X32-NEXT: addl -28(%ebp), %edx # 4-byte Folded Reload @@ -418,47 +418,47 @@ ; X32-NEXT: popl %eax ; X32-NEXT: movl %edx, -736(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, %edx -; X32-NEXT: adcl -120(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -112(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -532(%ebp) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ecx, -172(%ebp) # 4-byte Spill -; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -168(%ebp) # 4-byte Spill +; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -592(%ebp) # 4-byte Spill ; X32-NEXT: movl %esi, %edx ; X32-NEXT: movl %edx, %eax ; X32-NEXT: movl -116(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -84(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl %ebx, %eax ; X32-NEXT: movl %eax, -328(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %eax ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %eax, -368(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -160(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -620(%ebp) # 4-byte Spill -; X32-NEXT: movl -240(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -40(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -32(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, -788(%ebp) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -52(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -784(%ebp) # 4-byte Spill -; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -100(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -176(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edx, %eax -; X32-NEXT: movl -320(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -316(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -204(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -212(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, -804(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %edx, %eax -; X32-NEXT: movl -264(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -260(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, -820(%ebp) # 4-byte Spill -; X32-NEXT: movl -180(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -116(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edx, %eax @@ -466,52 +466,52 @@ ; X32-NEXT: movl %esi, -576(%ebp) # 4-byte Spill ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: movl %ecx, -540(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -160(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -800(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, -796(%ebp) # 4-byte Spill -; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -52(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -792(%ebp) # 4-byte Spill -; X32-NEXT: movl -220(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -224(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -224(%ebp) # 4-byte Spill ; X32-NEXT: movzbl -388(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl -236(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -240(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -376(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 40(%eax), %eax -; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, -304(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, -128(%ebp) # 4-byte Spill -; X32-NEXT: movl -100(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -176(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %edi ; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl -204(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -212(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: addl %esi, %edi ; X32-NEXT: adcl -376(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -468(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -80(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -76(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, -816(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %edi, -372(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -812(%ebp) # 4-byte Spill -; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %edx, -292(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, -808(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -512(%ebp) # 4-byte Spill ; X32-NEXT: movl -276(%ebp), %eax # 4-byte Reload @@ -521,10 +521,10 @@ ; X32-NEXT: lahf ; X32-NEXT: movl %eax, %eax ; X32-NEXT: movl %eax, -740(%ebp) # 4-byte Spill -; X32-NEXT: movl -240(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, -624(%ebp) # 4-byte Spill -; X32-NEXT: movl -172(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, -628(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %esi @@ -536,14 +536,14 @@ ; X32-NEXT: movl %ebx, -336(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl 52(%esi), %eax -; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -140(%ebp) # 4-byte Spill ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %edi, %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl %ebx, %esi -; X32-NEXT: movl %esi, -200(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -208(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx @@ -551,37 +551,37 @@ ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 56(%eax), %eax -; X32-NEXT: movl %eax, -244(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -248(%ebp) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, -360(%ebp) # 4-byte Spill ; X32-NEXT: movl -336(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: movl %edi, %edx -; X32-NEXT: movl %edx, -176(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -172(%ebp) # 4-byte Spill ; X32-NEXT: adcl -360(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: movl %ebx, -472(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %edi ; X32-NEXT: movl %edi, -436(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -336(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl -264(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -260(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, -824(%ebp) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %eax, -588(%ebp) # 4-byte Spill ; X32-NEXT: movl -276(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -208(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -632(%ebp) # 4-byte Spill -; X32-NEXT: movl -240(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %ebx, %eax ; X32-NEXT: movl %eax, -828(%ebp) # 4-byte Spill -; X32-NEXT: movl -172(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, -636(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax @@ -596,14 +596,14 @@ ; X32-NEXT: movl %eax, %edx ; X32-NEXT: movl %edx, -480(%ebp) # 4-byte Spill ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl -84(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, -920(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: addl %edx, %eax -; X32-NEXT: movl -256(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: adcl -384(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -932(%ebp) # 4-byte Spill @@ -629,10 +629,10 @@ ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, -528(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -524(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %edx, %eax -; X32-NEXT: movl -264(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -260(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, -976(%ebp) # 4-byte Spill ; X32-NEXT: movl 64(%ecx), %eax @@ -647,44 +647,44 @@ ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: movl %ecx, -992(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl -180(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -316(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, -1008(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %eax ; X32-NEXT: movl -336(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl %edi, %eax -; X32-NEXT: adcl -176(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -172(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -832(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %eax ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, -672(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -208(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -836(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -472(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -840(%ebp) # 4-byte Spill -; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -436(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -844(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -164(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl -176(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -680(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -80(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -76(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -856(%ebp) # 4-byte Spill -; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -268(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -372(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, -852(%ebp) # 4-byte Spill -; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -292(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, -848(%ebp) # 4-byte Spill -; X32-NEXT: movl -44(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -48(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload ; X32-NEXT: pushl %eax @@ -694,22 +694,22 @@ ; X32-NEXT: popl %eax ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, -860(%ebp) # 4-byte Spill -; X32-NEXT: movl -52(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -64(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, -864(%ebp) # 4-byte Spill -; X32-NEXT: movl -324(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl -400(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -176(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -172(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, -868(%ebp) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, -684(%ebp) # 4-byte Spill -; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -208(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -876(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -472(%ebp), %ebx # 4-byte Reload @@ -719,27 +719,27 @@ ; X32-NEXT: movl -436(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, -880(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, -888(%ebp) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, -688(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -208(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -900(%ebp) # 4-byte Spill -; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -268(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %ebx, %eax ; X32-NEXT: movl %eax, -896(%ebp) # 4-byte Spill -; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, -904(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 68(%eax), %eax -; X32-NEXT: movl %eax, -248(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %esi @@ -773,14 +773,14 @@ ; X32-NEXT: movl -480(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: movl %ecx, -692(%ebp) # 4-byte Spill -; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl -652(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movl %esi, -908(%ebp) # 4-byte Spill -; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -32(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: movl %esi, -916(%ebp) # 4-byte Spill -; X32-NEXT: movl -56(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -52(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: movl %esi, -912(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %esi # 4-byte Reload @@ -788,9 +788,9 @@ ; X32-NEXT: movl %esi, -696(%ebp) # 4-byte Spill ; X32-NEXT: adcl -16(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -652(%ebp) # 4-byte Spill -; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -112(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -924(%ebp) # 4-byte Spill -; X32-NEXT: adcl -60(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -56(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -928(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %ecx ; X32-NEXT: movl %ecx, %eax @@ -832,21 +832,21 @@ ; X32-NEXT: movl -660(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, -940(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl %edx, -944(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %edx -; X32-NEXT: movl -60(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl %esi, %edi ; X32-NEXT: movl %edi, -936(%ebp) # 4-byte Spill ; X32-NEXT: movl -116(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl %ebx, %edi ; X32-NEXT: movl %edi, -708(%ebp) # 4-byte Spill -; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -160(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -660(%ebp) # 4-byte Spill -; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -952(%ebp) # 4-byte Spill -; X32-NEXT: adcl -56(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -52(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -956(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 84(%eax), %eax @@ -883,15 +883,15 @@ ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: movl %esi, %edx ; X32-NEXT: movl %edx, -728(%ebp) # 4-byte Spill -; X32-NEXT: addl -136(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -132(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -712(%ebp) # 4-byte Spill ; X32-NEXT: movl -668(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -276(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -968(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: adcl -240(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -272(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -964(%ebp) # 4-byte Spill -; X32-NEXT: adcl -172(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -168(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -972(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 68(%eax), %eax @@ -927,36 +927,36 @@ ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: movl %edx, %eax -; X32-NEXT: addl -136(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -132(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -716(%ebp) # 4-byte Spill ; X32-NEXT: movl -664(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %esi ; X32-NEXT: adcl -276(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -988(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %esi -; X32-NEXT: adcl -240(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -272(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -984(%ebp) # 4-byte Spill ; X32-NEXT: movl %ecx, %esi -; X32-NEXT: adcl -172(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -168(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -980(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl -180(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edx, %esi ; X32-NEXT: movl %esi, -720(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, -664(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -44(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: movl %edi, -996(%ebp) # 4-byte Spill -; X32-NEXT: movl -20(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: movl %ecx, -1000(%ebp) # 4-byte Spill ; X32-NEXT: movl -524(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edx, %eax ; X32-NEXT: movl -528(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -320(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -316(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -1004(%ebp) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edx, %eax @@ -970,70 +970,70 @@ ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl -156(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl %esi, %edi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -100(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movl %edi, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -236(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebx -; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -80(%ebp) # 1-byte Folded Spill ; X32-NEXT: addl %eax, %ebx -; X32-NEXT: movzbl -88(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -80(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: addl %edx, %ebx ; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl -76(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -72(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %edi, %ecx -; X32-NEXT: movl -72(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -84(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -148(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -232(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %edx, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl -236(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %edx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl -88(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -80(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl %edi, -72(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %edi, -84(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -16(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl %edi, -76(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %edi, -72(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: setb %dl -; X32-NEXT: addl -104(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -232(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -236(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movzbl %dl, %edx ; X32-NEXT: adcl %ebx, %edx ; X32-NEXT: movl %edx, -608(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -88(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -28(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl -116(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -164(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -120(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -56(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -60(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -32(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -112(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -52(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl -56(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movl %ebx, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -236(%ebp) # 4-byte Spill ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, -164(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -160(%ebp) # 4-byte Spill ; X32-NEXT: adcl -608(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill -; X32-NEXT: adcl -88(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill +; X32-NEXT: adcl -80(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -52(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 44(%eax), %eax -; X32-NEXT: movl %eax, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -1044,7 +1044,7 @@ ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl -364(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -56(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl %esi, %ecx ; X32-NEXT: setb -16(%ebp) # 1-byte Folded Spill @@ -1054,89 +1054,89 @@ ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %esi, %edx ; X32-NEXT: adcl %ecx, %ebx -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl -324(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -152(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl -320(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -124(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -400(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %esi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %esi, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -80(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %edi ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl -324(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl %esi, -64(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -320(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl %esi, -60(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -16(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -112(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -108(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %eax ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -88(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -80(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -80(%ebp) # 1-byte Folded Spill ; X32-NEXT: addl -364(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -60(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movzbl -88(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: adcl -56(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movzbl -80(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -56(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl -324(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl -132(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -88(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -112(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -44(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -272(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -52(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -24(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: addl %eax, -88(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -320(%ebp), %edx # 4-byte Reload +; X32-NEXT: addl -164(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl -196(%ebp), %edx # 4-byte Reload +; X32-NEXT: adcl -108(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -268(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -64(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl -20(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl %eax, -80(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl %edx, -192(%ebp) # 4-byte Spill -; X32-NEXT: adcl -60(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -196(%ebp) # 4-byte Spill +; X32-NEXT: adcl -56(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: movl %edi, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edi, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -456(%ebp), %ecx # 4-byte Reload ; X32-NEXT: pushl %eax ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addb $127, %al ; X32-NEXT: sahf ; X32-NEXT: popl %eax -; X32-NEXT: adcl -72(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -84(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -608(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -76(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -72(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -760(%ebp) # 4-byte Spill -; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -232(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -80(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -236(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -756(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %eax -; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -160(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -752(%ebp) # 4-byte Spill ; X32-NEXT: movl %esi, %eax -; X32-NEXT: adcl -40(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -32(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -748(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -52(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -744(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 12(%eax), %eax -; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl -268(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -264(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -156(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl %edi, %ecx -; X32-NEXT: movl %ecx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edx ; X32-NEXT: setb %cl ; X32-NEXT: addl %eax, %edx @@ -1145,20 +1145,20 @@ ; X32-NEXT: movl %edi, %esi ; X32-NEXT: addl %esi, %edx ; X32-NEXT: adcl %ebx, %eax -; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill +; X32-NEXT: movl -132(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl -584(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %edi, %ecx ; X32-NEXT: movl -432(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -264(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -260(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %esi ; X32-NEXT: movl %esi, -432(%ebp) # 4-byte Spill -; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: movl %esi, -456(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %esi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl -112(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl %edi, -432(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -276(%ebp), %edi # 4-byte Reload @@ -1168,45 +1168,45 @@ ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl %ebx, %eax ; X32-NEXT: setb %bl -; X32-NEXT: addl -160(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -24(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -156(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -20(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movzbl %bl, %esi ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -112(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -136(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -180(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -108(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload +; X32-NEXT: addl -184(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %edi, %edx -; X32-NEXT: adcl -48(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -240(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -36(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -172(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -20(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -272(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -44(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -168(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl -24(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: movl %ebx, -584(%ebp) # 4-byte Spill ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, -276(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -240(%ebp) # 4-byte Spill -; X32-NEXT: adcl -112(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -172(%ebp) # 4-byte Spill +; X32-NEXT: adcl -20(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -272(%ebp) # 4-byte Spill +; X32-NEXT: adcl -108(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -168(%ebp) # 4-byte Spill ; X32-NEXT: movl -736(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %eax ; X32-NEXT: addb $127, %al ; X32-NEXT: sahf -; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -432(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -456(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, -232(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %edx, -164(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %esi, -40(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %edi, -56(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: adcl %ebx, -236(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %edx, -160(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %esi, -32(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %edi, -52(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 44(%eax), %eax -; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ebx @@ -1216,7 +1216,7 @@ ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl -304(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movl %ebx, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -44(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %esi @@ -1225,88 +1225,87 @@ ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: addl %edx, %esi ; X32-NEXT: adcl %edi, %eax -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill -; X32-NEXT: movl -100(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl -176(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -224(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl -376(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -212(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edx ; X32-NEXT: movl %edx, -376(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -44(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl %edx, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill ; X32-NEXT: movl %esi, %edx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl %edi, -376(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -220(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -80(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl -224(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl -76(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -224(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %eax ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -20(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl %edx, %eax ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb %dl ; X32-NEXT: addl -304(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -36(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -44(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movzbl %dl, %edx ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -48(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl %edi, %ebx +; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -176(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl -336(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -200(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -80(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -208(%ebp), %edx # 4-byte Reload +; X32-NEXT: adcl -76(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl -472(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl -372(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -436(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -292(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl %edx, -200(%ebp) # 4-byte Spill -; X32-NEXT: adcl -36(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -48(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %edx, -208(%ebp) # 4-byte Spill +; X32-NEXT: adcl -44(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -40(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -740(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %eax ; X32-NEXT: addb $127, %al ; X32-NEXT: sahf ; X32-NEXT: movl -376(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, -432(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -220(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -224(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, -456(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl %ebx, -584(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -200(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -208(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -276(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %edi, -240(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %esi, -172(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %edi, -272(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %esi, -168(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -640(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %eax ; X32-NEXT: addb $127, %al ; X32-NEXT: sahf -; X32-NEXT: adcl -64(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -60(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -376(%ebp) # 4-byte Spill ; X32-NEXT: adcl -16(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -220(%ebp) # 4-byte Spill -; X32-NEXT: adcl -88(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -224(%ebp) # 4-byte Spill +; X32-NEXT: adcl -80(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -640(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -200(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -44(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -208(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl -48(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -472(%ebp) # 4-byte Spill -; X32-NEXT: adcl -52(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -64(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -436(%ebp) # 4-byte Spill ; X32-NEXT: movl -408(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, -16(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 60(%eax), %eax -; X32-NEXT: movl %eax, -192(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx @@ -1317,10 +1316,10 @@ ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx @@ -1328,17 +1327,17 @@ ; X32-NEXT: movl -392(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -412(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -148(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -76(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill ; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -168(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi @@ -1350,7 +1349,7 @@ ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl -340(%ebp), %edi # 4-byte Reload @@ -1361,30 +1360,30 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -68(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -764(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -48(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -36(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl -40(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill +; X32-NEXT: adcl -44(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -16(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -44(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -48(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %cl ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload @@ -1392,136 +1391,135 @@ ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -140(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -164(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -136(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -244(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -48(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -64(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -80(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -76(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -16(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -36(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -44(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -108(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -80(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -76(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill ; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill -; X32-NEXT: adcl -20(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill +; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -20(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -24(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -20(%ebp), %edi # 1-byte Folded Reload +; X32-NEXT: movzbl -24(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl -392(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -100(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -412(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -244(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx ; X32-NEXT: movl -16(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -80(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -76(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -36(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -44(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -68(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -16(%ebp) # 4-byte Spill ; X32-NEXT: adcl -420(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -76(%ebp) # 4-byte Spill ; X32-NEXT: adcl -616(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -80(%ebp) # 4-byte Spill ; X32-NEXT: adcl -612(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -272(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -268(%ebp) # 4-byte Spill ; X32-NEXT: movl -352(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %ecx, %esi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -92(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl -92(%ebp), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %eax, -68(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -364(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -148(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -24(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill ; X32-NEXT: movl -416(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, -124(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -616(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %edi, %ebx +; X32-NEXT: addl -124(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -92(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -612(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -152(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -316(%ebp), %ebx # 4-byte Reload +; X32-NEXT: setb -124(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -324(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -152(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -124(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -32(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -36(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -424(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -44(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill +; X32-NEXT: addl -48(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill ; X32-NEXT: adcl -68(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -36(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl %edx, -48(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -416(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, -424(%ebp) # 4-byte Spill @@ -1539,103 +1537,102 @@ ; X32-NEXT: movl %eax, -420(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -324(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -320(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl -100(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -244(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -424(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -420(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -20(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -36(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -24(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -44(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: setb -68(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -20(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -24(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -48(%ebp) # 4-byte Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -44(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill +; X32-NEXT: adcl -48(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -44(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -48(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -44(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -48(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl -364(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -100(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -244(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl -44(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movzbl -68(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -32(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -196(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -36(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -200(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -504(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl -508(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: addl -24(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl -64(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -48(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -20(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill +; X32-NEXT: adcl -60(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill +; X32-NEXT: adcl -40(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -292(%ebp) # 4-byte Spill -; X32-NEXT: adcl -52(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: adcl -64(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, -16(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -88(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -272(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -268(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -352(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -52(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %edi, %ebx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -252(%ebp) # 4-byte Folded Reload +; X32-NEXT: movl -180(%ebp), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -252(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx @@ -1643,121 +1640,123 @@ ; X32-NEXT: movl -364(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -116(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -88(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl -416(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl -416(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, -508(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -68(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl -180(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -504(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -68(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -324(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movzbl -68(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -296(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -768(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -48(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -40(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -372(%ebp) # 4-byte Spill -; X32-NEXT: adcl -64(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -60(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -64(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -416(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: addl -40(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 28(%eax), %ebx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %ebx, %esi -; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb %cl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -324(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -320(%ebp), %ebx # 4-byte Reload +; X32-NEXT: addl -216(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -228(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -372(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -152(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -124(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -68(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -24(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -52(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -20(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -64(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: setb -372(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -24(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -20(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl -68(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %edi ; X32-NEXT: setb -68(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl -68(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl -364(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -216(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -228(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, -68(%ebp) # 4-byte Spill -; X32-NEXT: movl -52(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -64(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movzbl -372(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi @@ -1767,48 +1766,48 @@ ; X32-NEXT: adcl -776(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -772(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -780(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -36(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -44(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, -508(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -20(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, -504(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -292(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -152(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -44(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -64(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %ecx, -124(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -48(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl %ecx, -60(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl -16(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill -; X32-NEXT: adcl -80(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl -88(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -44(%ebp) # 4-byte Spill -; X32-NEXT: adcl -272(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %edx, -64(%ebp) # 4-byte Spill +; X32-NEXT: adcl -76(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill +; X32-NEXT: adcl -80(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -48(%ebp) # 4-byte Spill +; X32-NEXT: adcl -268(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -68(%ebp) # 4-byte Spill -; X32-NEXT: setb -20(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -24(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -408(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -268(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb -16(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl -16(%ebp), %ecx # 1-byte Folded Reload @@ -1816,51 +1815,50 @@ ; X32-NEXT: movl -392(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -116(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -412(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -88(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -76(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -440(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -440(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -292(%ebp) # 4-byte Spill ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: addl -80(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, -372(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -80(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -340(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -88(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -80(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -332(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -448(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -36(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -44(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -448(%ebp) # 4-byte Spill -; X32-NEXT: adcl -272(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl -268(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -16(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -1868,7 +1866,7 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -48(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax @@ -1880,105 +1878,105 @@ ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -140(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -164(%ebp), %ebx # 4-byte Reload +; X32-NEXT: addl -216(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -228(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -448(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -88(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -80(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -296(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -80(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -76(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl -16(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: setb -16(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -268(%ebp) # 4-byte Spill +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -80(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -76(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill ; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl -80(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: adcl -76(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %edi -; X32-NEXT: setb -80(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -76(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl -80(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -76(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl -392(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -216(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -412(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -228(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -80(%ebp) # 4-byte Spill -; X32-NEXT: movl -272(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %esi, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl -268(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -36(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -44(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movzbl -16(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl -80(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl -332(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: adcl -648(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill ; X32-NEXT: adcl -644(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -332(%ebp) # 4-byte Spill ; X32-NEXT: adcl -572(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill ; X32-NEXT: movl -292(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -52(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -64(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -372(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -88(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -44(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -20(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -80(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -48(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -296(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl -68(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movzbl -20(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -24(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl %ebx, -272(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %ebx, -268(%ebp) # 4-byte Spill +; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax ; X32-NEXT: movl -332(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: addl -32(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl -36(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -292(%ebp) # 4-byte Spill -; X32-NEXT: adcl -196(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -200(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -372(%ebp) # 4-byte Spill ; X32-NEXT: adcl -608(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -80(%ebp) # 4-byte Spill ; X32-NEXT: adcl -760(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -296(%ebp) # 4-byte Spill ; X32-NEXT: movl -756(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -272(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %ecx, -268(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl -752(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill ; X32-NEXT: adcl -748(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -332(%ebp) # 4-byte Spill ; X32-NEXT: movl -744(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -80(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -76(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -288(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -168(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 28(%eax), %eax @@ -1993,7 +1991,7 @@ ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload @@ -2003,23 +2001,23 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -148(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -48(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill ; X32-NEXT: movl -348(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -572(%ebp) # 4-byte Spill -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -32(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -36(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -92(%ebp), %esi # 4-byte Reload @@ -2029,31 +2027,31 @@ ; X32-NEXT: movl %eax, -448(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -216(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -228(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -232(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -428(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -52(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl -64(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill +; X32-NEXT: adcl -20(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -64(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -48(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -24(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -20(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload @@ -2063,78 +2061,78 @@ ; X32-NEXT: movl %eax, -428(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %cl -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -320(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -316(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -244(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -196(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -52(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -200(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -428(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -44(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -20(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -52(%ebp) # 1-byte Folded Spill +; X32-NEXT: addl -48(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -24(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: setb -64(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -108(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -44(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -48(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill +; X32-NEXT: adcl -20(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -24(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -20(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -24(%ebp), %edi # 1-byte Folded Reload +; X32-NEXT: movzbl -20(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl -280(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -100(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -308(%ebp), %ebx # 4-byte Reload +; X32-NEXT: adcl -244(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -20(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -44(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -48(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -52(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -64(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -228(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill +; X32-NEXT: addl -232(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill ; X32-NEXT: adcl -596(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -48(%ebp) # 4-byte Spill ; X32-NEXT: adcl -464(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -464(%ebp) # 4-byte Spill ; X32-NEXT: adcl -536(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -68(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %esi @@ -2145,29 +2143,29 @@ ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %bl -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -156(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -264(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -148(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl -256(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, -648(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx @@ -2181,7 +2179,7 @@ ; X32-NEXT: movl %eax, -644(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb -536(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -124(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax @@ -2189,15 +2187,15 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -344(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -452(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -32(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -36(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -452(%ebp) # 4-byte Spill -; X32-NEXT: adcl -228(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -260(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -232(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -64(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -256(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, -536(%ebp) # 4-byte Spill @@ -2215,122 +2213,122 @@ ; X32-NEXT: movl %eax, -596(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -136(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -132(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl -100(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -244(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -452(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -536(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -596(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -24(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -52(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -228(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -20(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -64(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: setb -232(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -24(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -20(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill +; X32-NEXT: adcl -36(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -32(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -36(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -32(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -36(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl -160(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -156(%ebp), %ebx # 4-byte Reload +; X32-NEXT: addl -100(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -264(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -244(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -52(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl -64(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -228(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -232(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl -344(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -404(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -532(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl -592(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: addl -572(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -64(%ebp) # 4-byte Spill ; X32-NEXT: adcl -448(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl -196(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill +; X32-NEXT: adcl -200(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -572(%ebp) # 4-byte Spill ; X32-NEXT: adcl -428(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -48(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -464(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -68(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -228(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -232(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -428(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -228(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -232(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -252(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -180(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -452(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -252(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -156(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -116(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -264(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -88(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -196(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -200(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -228(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %esi, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl -256(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, -532(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -592(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx @@ -2343,7 +2341,7 @@ ; X32-NEXT: movl %eax, -532(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax @@ -2355,9 +2353,9 @@ ; X32-NEXT: movl %eax, -448(%ebp) # 4-byte Spill ; X32-NEXT: adcl -452(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -328(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -196(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -228(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -260(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl $0, -200(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -232(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -256(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx @@ -2370,22 +2368,22 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -48(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, -452(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebx ; X32-NEXT: setb %cl -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -136(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload +; X32-NEXT: addl -216(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -228(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -448(%ebp), %eax # 4-byte Reload @@ -2394,43 +2392,43 @@ ; X32-NEXT: adcl %eax, -452(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -196(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -228(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -200(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -232(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: setb -448(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -196(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -200(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -196(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -200(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, -328(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill ; X32-NEXT: adcl -328(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %edi ; X32-NEXT: setb -328(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl -328(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -156(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl -216(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -264(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -228(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, -328(%ebp) # 4-byte Spill -; X32-NEXT: movl -228(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -232(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -196(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -200(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movzbl -448(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi @@ -2440,22 +2438,22 @@ ; X32-NEXT: adcl -620(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -788(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -784(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -52(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -64(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, -592(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -24(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -20(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, -532(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -572(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, -428(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -32(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -36(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, -452(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -20(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -228(%ebp) # 4-byte Spill -; X32-NEXT: adcl -44(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -196(%ebp) # 4-byte Spill +; X32-NEXT: addl -24(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -232(%ebp) # 4-byte Spill +; X32-NEXT: adcl -48(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -200(%ebp) # 4-byte Spill ; X32-NEXT: adcl -464(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -620(%ebp) # 4-byte Spill ; X32-NEXT: adcl -68(%ebp), %eax # 4-byte Folded Reload @@ -2463,10 +2461,10 @@ ; X32-NEXT: setb -464(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -288(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi @@ -2474,47 +2472,47 @@ ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -44(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -48(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -44(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -48(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -116(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -88(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -64(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill ; X32-NEXT: movl -348(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, -68(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb -368(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -216(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax @@ -2522,18 +2520,18 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -540(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -576(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -20(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -24(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -576(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl -20(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -64(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -48(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -348(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, -368(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -2541,140 +2539,140 @@ ; X32-NEXT: addl -368(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -48(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, -368(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -320(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -184(%ebp), %ebx # 4-byte Reload +; X32-NEXT: addl -216(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -316(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl -228(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %edi ; X32-NEXT: movl -576(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -20(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -24(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -368(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: addl -52(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -44(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -64(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -48(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: setb -576(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -52(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -64(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: adcl -20(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: setb -24(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -20(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -24(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -20(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl -280(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -216(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -308(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -228(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -44(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -48(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -52(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -64(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: movzbl -576(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: movl -24(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -20(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl -540(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -48(%ebp) # 4-byte Spill ; X32-NEXT: adcl -800(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill ; X32-NEXT: adcl -796(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl -792(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, %edi -; X32-NEXT: movl -32(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -228(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -36(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl -232(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -68(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -196(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -200(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -620(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -368(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl -328(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movzbl -464(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, -44(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -48(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -64(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edi ; X32-NEXT: addl -344(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -36(%ebp) # 4-byte Spill ; X32-NEXT: adcl -404(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill -; X32-NEXT: adcl -72(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -76(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -232(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -52(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -164(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -24(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -56(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -32(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill +; X32-NEXT: adcl -72(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -236(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -64(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -160(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -20(%ebp), %edx # 4-byte Reload +; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -52(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -36(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -616(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill ; X32-NEXT: movl -68(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -612(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -68(%ebp) # 4-byte Spill -; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -424(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill ; X32-NEXT: adcl -420(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -368(%ebp) # 4-byte Spill ; X32-NEXT: adcl -508(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill ; X32-NEXT: adcl -504(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill -; X32-NEXT: adcl -152(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl -64(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -64(%ebp) # 4-byte Spill +; X32-NEXT: adcl -124(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill +; X32-NEXT: adcl -60(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -464(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, -292(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -372(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -88(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -296(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -272(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -36(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -268(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -332(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -288(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -160(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi @@ -2682,11 +2680,11 @@ ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload @@ -2695,34 +2693,34 @@ ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -176(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -212(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl -348(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl -192(%ebp), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill +; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -72(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -216(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax @@ -2730,18 +2728,18 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -468(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -804(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -164(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill -; X32-NEXT: adcl -76(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -56(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl -160(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill +; X32-NEXT: adcl -72(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -32(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -236(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -160(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -2749,134 +2747,134 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb %cl -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -316(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -128(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx +; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -160(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -164(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -232(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -236(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -56(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -40(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -72(%ebp) # 1-byte Folded Spill +; X32-NEXT: addl -52(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -32(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: setb -84(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -236(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -40(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -32(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %edi -; X32-NEXT: setb -40(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -32(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl -40(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -32(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl -280(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -128(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl -56(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl -52(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -76(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -72(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -72(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -84(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl -468(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill ; X32-NEXT: adcl -816(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill ; X32-NEXT: adcl -812(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -344(%ebp) # 4-byte Spill ; X32-NEXT: adcl -808(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -148(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -328(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -156(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl -176(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -264(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -212(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -60(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %esi, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl -256(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, -468(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -508(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl -468(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -504(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -124(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax @@ -2884,15 +2882,15 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -512(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -820(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -196(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -200(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -404(%ebp) # 4-byte Spill ; X32-NEXT: adcl -328(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -196(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -64(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -72(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -260(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, -200(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -60(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -84(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -256(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -236(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, -328(%ebp) # 4-byte Spill @@ -2903,159 +2901,159 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, -468(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb %cl -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -136(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -128(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -404(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -328(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -200(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -468(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -64(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -72(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -196(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -236(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl -60(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -84(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: setb -200(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -64(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -60(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, -404(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -112(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill ; X32-NEXT: adcl -404(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: setb -404(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl -404(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -156(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -264(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl -128(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -72(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -84(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl -64(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -196(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -200(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -512(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -676(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -624(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -628(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: addl -152(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill -; X32-NEXT: adcl -228(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill -; X32-NEXT: adcl -164(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -124(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -84(%ebp) # 4-byte Spill +; X32-NEXT: adcl -232(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -60(%ebp) # 4-byte Spill +; X32-NEXT: adcl -160(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -628(%ebp) # 4-byte Spill -; X32-NEXT: adcl -232(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -236(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -624(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -56(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -72(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -344(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl $0, -32(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -300(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -232(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -160(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -232(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -236(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -144(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -140(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull -144(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -140(%ebp) # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -156(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -336(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -176(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -264(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -172(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -124(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -232(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %esi, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl -256(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -300(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, -404(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -540(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -196(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -124(%ebp), %ebx # 4-byte Reload +; X32-NEXT: setb -200(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -120(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -196(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -200(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -588(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -824(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -164(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -160(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -420(%ebp) # 4-byte Spill -; X32-NEXT: adcl -228(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -232(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -424(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -152(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -232(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -260(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl $0, -124(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -236(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -256(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx @@ -3066,59 +3064,59 @@ ; X32-NEXT: movl 60(%eax), %esi ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %esi, -164(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -160(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %cl -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -264(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl -204(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -260(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -360(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl -420(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -228(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %eax, -232(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -424(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -196(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -200(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -152(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -232(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -232(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -244(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl -124(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -236(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: setb -236(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -124(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -424(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -152(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -124(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -152(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl %edx, -124(%ebp) # 4-byte Spill +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -420(%ebp) # 4-byte Spill -; X32-NEXT: adcl -152(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -124(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -152(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -124(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -152(%ebp), %edi # 1-byte Folded Reload +; X32-NEXT: movzbl -124(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -156(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl -204(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -264(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl -360(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx @@ -3126,32 +3124,32 @@ ; X32-NEXT: addl %ecx, %edx ; X32-NEXT: movl -420(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -232(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -236(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -588(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -632(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -828(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -636(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -404(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -540(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -628(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -228(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -232(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -624(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -196(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -200(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -56(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: addl -52(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -424(%ebp) # 4-byte Spill -; X32-NEXT: adcl -76(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -72(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -420(%ebp) # 4-byte Spill ; X32-NEXT: adcl -344(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -636(%ebp) # 4-byte Spill -; X32-NEXT: adcl -40(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -32(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -632(%ebp) # 4-byte Spill ; X32-NEXT: setb -588(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -288(%ebp), %ebx # 4-byte Reload @@ -3159,7 +3157,7 @@ ; X32-NEXT: movl -300(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -3167,11 +3165,11 @@ ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %cl ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload @@ -3181,52 +3179,52 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -336(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -176(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -172(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill ; X32-NEXT: movl -348(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -300(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -64(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -216(%ebp), %edi # 4-byte Reload +; X32-NEXT: setb -60(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -220(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -64(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -60(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -672(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -832(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -76(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -72(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -344(%ebp) # 4-byte Spill -; X32-NEXT: adcl -72(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -56(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl -84(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -32(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -3234,206 +3232,206 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -164(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %ebx # 4-byte Reload +; X32-NEXT: addl -204(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -316(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -360(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -344(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -72(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -64(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %eax, -84(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -60(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -56(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -40(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -56(%ebp) # 1-byte Folded Spill +; X32-NEXT: addl -52(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -32(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: setb -52(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -344(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -40(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -32(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill -; X32-NEXT: adcl -76(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill +; X32-NEXT: adcl -72(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %edi -; X32-NEXT: setb -76(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -72(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl -76(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -72(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl -280(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -204(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -360(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill ; X32-NEXT: movl -344(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -32(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -56(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -52(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl -672(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: adcl -836(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill ; X32-NEXT: adcl -840(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -52(%ebp) # 4-byte Spill ; X32-NEXT: adcl -844(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: movl -232(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl -236(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -424(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -152(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -124(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl -420(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -72(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -84(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -636(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -64(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -60(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl -632(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movzbl -588(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx ; X32-NEXT: movl %ebx, -344(%ebp) # 4-byte Spill -; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax -; X32-NEXT: movl -56(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -52(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -72(%ebp) # 4-byte Folded Spill ; X32-NEXT: addl -512(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -236(%ebp) # 4-byte Spill ; X32-NEXT: adcl -676(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -124(%ebp) # 4-byte Spill ; X32-NEXT: adcl -432(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -84(%ebp) # 4-byte Spill ; X32-NEXT: adcl -456(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -60(%ebp) # 4-byte Spill ; X32-NEXT: movl -344(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -584(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -276(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill -; X32-NEXT: adcl -240(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -56(%ebp) # 4-byte Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -172(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -32(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill +; X32-NEXT: adcl -272(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -168(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -36(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edx, -508(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -68(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, -504(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -20(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, -328(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -368(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, -468(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -44(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -48(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, -404(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -52(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -64(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, -540(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -24(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl %edx, -228(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -20(%ebp), %edx # 4-byte Reload +; X32-NEXT: adcl %edx, -232(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -464(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl %edx, -196(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -232(%ebp), %edx # 4-byte Reload +; X32-NEXT: adcl %edx, -200(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -236(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl -72(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -84(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl -64(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -60(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -56(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -32(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl -292(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -236(%ebp) # 4-byte Spill ; X32-NEXT: adcl -372(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -152(%ebp) # 4-byte Spill -; X32-NEXT: adcl -88(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -124(%ebp) # 4-byte Spill +; X32-NEXT: adcl -80(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -84(%ebp) # 4-byte Spill ; X32-NEXT: adcl -296(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -64(%ebp) # 4-byte Spill -; X32-NEXT: adcl -272(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, -60(%ebp) # 4-byte Spill +; X32-NEXT: adcl -268(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -344(%ebp) # 4-byte Spill -; X32-NEXT: movl -40(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -36(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl -56(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -32(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -44(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl -52(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -332(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -56(%ebp) # 4-byte Spill -; X32-NEXT: adcl -80(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill +; X32-NEXT: adcl -76(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill ; X32-NEXT: setb -372(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -408(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -240(%ebp) # 4-byte Spill +; X32-NEXT: movl -408(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -192(%ebp), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, -276(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -240(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -148(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -144(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -392(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -176(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -412(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -212(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -76(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -168(%ebp) # 4-byte Spill ; X32-NEXT: movl -440(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -268(%ebp) # 4-byte Spill ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -36(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -44(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax @@ -3449,29 +3447,29 @@ ; X32-NEXT: addl -680(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -884(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: addl -276(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -240(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -172(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -440(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -236(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill +; X32-NEXT: adcl -272(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -168(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl -240(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, -276(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %ecx +; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb %cl ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload @@ -3479,44 +3477,44 @@ ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -140(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -128(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -276(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -240(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -272(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -80(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -172(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -20(%ebp) # 1-byte Folded Spill +; X32-NEXT: addl -76(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -168(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: setb -24(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -236(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -172(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -168(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -172(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -168(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -168(%ebp) # 4-byte Spill ; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl -172(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: adcl -168(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %edi -; X32-NEXT: setb -172(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -168(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl -172(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -168(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl -392(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %edi # 4-byte Folded Reload @@ -3524,68 +3522,68 @@ ; X32-NEXT: adcl -128(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -172(%ebp) # 4-byte Spill -; X32-NEXT: movl -80(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %esi, -168(%ebp) # 4-byte Spill +; X32-NEXT: movl -76(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -36(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -44(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -20(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -24(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl -172(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl -680(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill ; X32-NEXT: adcl -856(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill ; X32-NEXT: adcl -852(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -292(%ebp) # 4-byte Spill ; X32-NEXT: adcl -848(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -168(%ebp) # 4-byte Spill ; X32-NEXT: movl -352(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -20(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -24(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -148(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -144(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -364(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -176(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -396(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -212(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %edi -; X32-NEXT: movl %edi, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -24(%ebp) # 4-byte Spill ; X32-NEXT: movl -416(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -432(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -88(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -80(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %esi @@ -3594,7 +3592,7 @@ ; X32-NEXT: movl %eax, -456(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -316(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax @@ -3602,18 +3600,18 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -656(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -892(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -44(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill -; X32-NEXT: adcl -52(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -88(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl -48(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill +; X32-NEXT: adcl -64(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -416(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -236(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -3621,56 +3619,56 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb %cl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -324(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -400(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -128(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -44(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -48(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -80(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -64(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -24(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -20(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -24(%ebp) # 1-byte Folded Spill +; X32-NEXT: addl -20(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -24(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: setb -20(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -236(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -88(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -88(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -80(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -112(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill -; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill +; X32-NEXT: adcl -36(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -32(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -36(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -32(%ebp), %edi # 1-byte Folded Reload +; X32-NEXT: movzbl -36(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl -364(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %edi # 4-byte Folded Reload @@ -3678,50 +3676,50 @@ ; X32-NEXT: adcl -128(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -20(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -88(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -80(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -24(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -20(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -656(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -700(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -860(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -864(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: addl -272(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill +; X32-NEXT: addl -268(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill ; X32-NEXT: adcl -296(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -80(%ebp) # 4-byte Spill ; X32-NEXT: adcl -276(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -332(%ebp) # 4-byte Spill -; X32-NEXT: adcl -240(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -272(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -368(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -36(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -292(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -172(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -168(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -352(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -300(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, -276(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx @@ -3729,9 +3727,9 @@ ; X32-NEXT: movl -364(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -336(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -176(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -172(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -36(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, -296(%ebp) # 4-byte Spill ; X32-NEXT: movl -416(%ebp), %ebx # 4-byte Reload @@ -3739,22 +3737,22 @@ ; X32-NEXT: movl -300(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -268(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb -68(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -316(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax @@ -3764,13 +3762,13 @@ ; X32-NEXT: adcl -868(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: addl -276(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -512(%ebp) # 4-byte Spill -; X32-NEXT: adcl -240(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -272(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -32(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -36(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -296(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -416(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, -276(%ebp) # 4-byte Spill @@ -3781,20 +3779,20 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -164(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %edi ; X32-NEXT: setb %cl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -324(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -320(%ebp), %ebx # 4-byte Reload +; X32-NEXT: addl -204(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -360(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx @@ -3802,38 +3800,38 @@ ; X32-NEXT: movl -512(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -276(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -68(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -240(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -272(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -32(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -36(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl -296(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: setb -512(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -296(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -32(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -36(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill ; X32-NEXT: adcl -68(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %edi ; X32-NEXT: setb -68(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl -68(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl -364(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -204(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -360(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi @@ -3841,7 +3839,7 @@ ; X32-NEXT: movl %esi, -68(%ebp) # 4-byte Spill ; X32-NEXT: movl -296(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -32(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -36(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movzbl -512(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi @@ -3851,66 +3849,66 @@ ; X32-NEXT: adcl -876(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -872(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -880(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -20(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl %ecx, -24(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -88(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -272(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -24(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl %ecx, -20(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -80(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl %ecx, -268(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -332(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, -276(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -368(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -240(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %ecx, -272(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -80(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: addl -76(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -296(%ebp) # 4-byte Spill -; X32-NEXT: adcl -36(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill +; X32-NEXT: adcl -44(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill ; X32-NEXT: adcl -292(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -292(%ebp) # 4-byte Spill -; X32-NEXT: adcl -172(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -168(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -68(%ebp) # 4-byte Spill -; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -80(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -408(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -300(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %ecx, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -172(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -168(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -172(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -168(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -336(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -392(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -176(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -172(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -412(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: movl %esi, -336(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl %ecx, -176(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -172(%ebp) # 4-byte Spill ; X32-NEXT: movl -440(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -300(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -168(%ebp) # 4-byte Spill ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi @@ -3918,11 +3916,11 @@ ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb -332(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload @@ -3933,59 +3931,59 @@ ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl -688(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -888(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -36(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -20(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: addl -44(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -332(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, -336(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -176(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -172(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -20(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -24(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull -164(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -160(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %cl ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -132(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -140(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -204(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -360(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %edi, -36(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %edi, -44(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -332(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -20(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -24(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl -336(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -132(%ebp) # 4-byte Spill -; X32-NEXT: adcl -176(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -140(%ebp) # 4-byte Spill -; X32-NEXT: setb -176(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl %esi, -164(%ebp) # 4-byte Spill +; X32-NEXT: adcl -172(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -136(%ebp) # 4-byte Spill +; X32-NEXT: setb -172(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -408(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, -332(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -336(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi @@ -3998,21 +3996,21 @@ ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb -332(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl -332(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -392(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -204(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -412(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -360(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -336(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -132(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -140(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movzbl -176(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: addl -164(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -136(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movzbl -172(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl -688(%ebp), %ebx # 4-byte Folded Reload @@ -4022,15 +4020,15 @@ ; X32-NEXT: movl %esi, -392(%ebp) # 4-byte Spill ; X32-NEXT: adcl -904(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -412(%ebp) # 4-byte Spill -; X32-NEXT: movl -172(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -168(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -296(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -80(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -36(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -76(%ebp), %edx # 4-byte Reload +; X32-NEXT: adcl -36(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -44(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -292(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -20(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl -68(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movzbl -88(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -80(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx ; X32-NEXT: movl %ebx, -336(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, -360(%ebp) # 4-byte Folded Spill @@ -4039,41 +4037,41 @@ ; X32-NEXT: movl -412(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -656(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -168(%ebp) # 4-byte Spill ; X32-NEXT: adcl -700(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill ; X32-NEXT: adcl -376(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -220(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -224(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -336(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -640(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -360(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -208(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -472(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -392(%ebp) # 4-byte Spill ; X32-NEXT: adcl -436(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -232(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -236(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -432(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -456(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -44(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -48(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -64(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -344(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -24(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -272(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -20(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -268(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -52(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -276(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -240(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -272(%ebp) # 4-byte Folded Spill ; X32-NEXT: movzbl -372(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, -172(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -168(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %edi, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -24(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, -336(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %edx @@ -4085,172 +4083,174 @@ ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -476(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -140(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -132(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -140(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -136(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -252(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -140(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -136(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -308(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -216(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -480(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -208(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -228(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -384(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -200(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -208(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -176(%ebp) # 4-byte Spill -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -476(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl %esi, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl -312(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -476(%ebp), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: addl -32(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -248(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -252(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -40(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -252(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: setb -32(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movzbl -40(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl -692(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movzbl -32(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: addl -692(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -920(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -132(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -140(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -56(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -200(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -176(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -212(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -516(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -132(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -140(%ebp) # 4-byte Spill +; X32-NEXT: addl -164(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill +; X32-NEXT: adcl -136(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -208(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -172(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -312(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl -516(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -132(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl %esi, %ecx +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl 8(%ebp), %eax -; X32-NEXT: movl 76(%eax), %edx -; X32-NEXT: movl %edx, -132(%ebp) # 4-byte Spill -; X32-NEXT: movl -212(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edx +; X32-NEXT: movl 76(%eax), %esi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, -164(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %edi +; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill +; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %cl -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -132(%ebp) # 4-byte Folded Reload +; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -116(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -484(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -84(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -488(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: addl %ebx, -140(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -204(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -136(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -52(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -32(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -200(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -176(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -56(%ebp) # 1-byte Folded Spill +; X32-NEXT: addl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -172(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: setb -52(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -284(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -516(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -200(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -176(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -208(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -200(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -208(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -204(%ebp) # 4-byte Spill ; X32-NEXT: movl -284(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -132(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill -; X32-NEXT: adcl -224(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -208(%ebp) # 4-byte Spill +; X32-NEXT: adcl -204(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -224(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -204(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -224(%ebp), %edi # 1-byte Folded Reload +; X32-NEXT: movzbl -204(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -308(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -216(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl -484(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -208(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -228(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl -488(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -176(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -172(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -200(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -208(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -56(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -52(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -692(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -176(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -172(%ebp) # 4-byte Spill ; X32-NEXT: adcl -908(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -200(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -208(%ebp) # 4-byte Spill ; X32-NEXT: adcl -916(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -68(%ebp) # 4-byte Spill ; X32-NEXT: adcl -912(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %ebx, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl -104(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -476(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -56(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -56(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -52(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -252(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload @@ -4258,15 +4258,15 @@ ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -100(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -480(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -156(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -384(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -224(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -204(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -56(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %esi, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl -152(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -476(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi @@ -4279,11 +4279,11 @@ ; X32-NEXT: addl -436(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -248(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -252(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl -92(%ebp), %edi # 4-byte Reload @@ -4294,26 +4294,26 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -696(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -932(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -76(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill -; X32-NEXT: adcl -72(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -224(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -56(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -72(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill +; X32-NEXT: adcl -84(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -204(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -516(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -84(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -436(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -72(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -84(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -132(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax @@ -4327,92 +4327,92 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -28(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl -484(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -256(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -488(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -80(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -436(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -472(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -224(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -56(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -56(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl -204(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -52(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: setb -52(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -104(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -516(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -76(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -72(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -132(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl -104(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl -72(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: adcl -84(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -72(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -84(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -72(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -84(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl -104(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -100(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl -484(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -156(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -488(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -224(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %esi, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl -204(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl -76(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -72(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -56(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -52(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl -696(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -652(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -924(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl -928(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: addl -64(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill -; X32-NEXT: adcl -220(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl -140(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -152(%ebp) # 4-byte Spill -; X32-NEXT: adcl -40(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -176(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -200(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl -60(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -204(%ebp) # 4-byte Spill +; X32-NEXT: adcl -224(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill +; X32-NEXT: adcl -136(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, -124(%ebp) # 4-byte Spill +; X32-NEXT: adcl -32(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -172(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -208(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -68(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -32(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl $0, -36(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -548(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -140(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -136(%ebp) # 4-byte Spill ; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -40(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -32(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull -544(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl %edi, %eax @@ -4421,25 +4421,25 @@ ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -100(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -380(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -156(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -356(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -224(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %esi, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl -152(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -548(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -56(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill ; X32-NEXT: movl -92(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -56(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -52(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %edi @@ -4447,42 +4447,42 @@ ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -296(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -56(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -52(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -92(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -56(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -52(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -704(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -948(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -140(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -136(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -292(%ebp) # 4-byte Spill -; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -376(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -220(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -64(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl $0, -224(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -60(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -580(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -140(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -140(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -136(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 92(%eax), %ebx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %ebx, %esi -; X32-NEXT: movl %esi, -140(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -136(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb %cl ; X32-NEXT: movl -92(%ebp), %eax # 4-byte Reload @@ -4492,35 +4492,35 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -28(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -600(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -256(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -604(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl -292(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %eax, -32(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -376(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -56(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -52(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -220(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -64(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -224(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -60(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: setb -376(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -580(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -220(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -220(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -224(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, -292(%ebp) # 4-byte Spill -; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -140(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -136(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill ; X32-NEXT: adcl -292(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: setb -292(%ebp) # 1-byte Folded Spill @@ -4529,15 +4529,15 @@ ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl -292(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -104(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -100(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl -600(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -156(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -244(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl -604(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -64(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -60(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -220(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -224(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: movzbl -376(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi @@ -4546,34 +4546,34 @@ ; X32-NEXT: adcl -940(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -944(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -936(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -224(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -88(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -296(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -204(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -80(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -56(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -296(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -32(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -52(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -176(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -64(%ebp) # 4-byte Spill -; X32-NEXT: adcl -200(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -220(%ebp) # 4-byte Spill +; X32-NEXT: addl -172(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -60(%ebp) # 4-byte Spill +; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -224(%ebp) # 4-byte Spill ; X32-NEXT: adcl -68(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -68(%ebp) # 4-byte Spill -; X32-NEXT: adcl -32(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -152(%ebp) # 4-byte Spill -; X32-NEXT: setb -32(%ebp) # 1-byte Folded Spill +; X32-NEXT: adcl -36(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, -124(%ebp) # 4-byte Spill +; X32-NEXT: setb -36(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -548(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, -176(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx @@ -4584,29 +4584,29 @@ ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -208(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -380(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -216(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -356(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -228(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: movl %esi, -380(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl %ecx, -356(%ebp) # 4-byte Spill -; X32-NEXT: movl -212(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl -548(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx @@ -4617,10 +4617,10 @@ ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -252(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax @@ -4628,32 +4628,32 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -708(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -960(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -176(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -172(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -376(%ebp) # 4-byte Spill -; X32-NEXT: adcl -200(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill +; X32-NEXT: adcl -208(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -204(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, -380(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -356(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -212(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -580(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, -176(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -172(%ebp) # 4-byte Spill ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %edi, %ecx ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl -212(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -140(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -136(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -208(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edi, %ebx ; X32-NEXT: addl %esi, %eax @@ -4661,28 +4661,28 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -116(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -600(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -84(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -604(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -376(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -176(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -224(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -200(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %eax, -172(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -204(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -208(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl -380(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -116(%ebp) # 4-byte Spill ; X32-NEXT: adcl -356(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -88(%ebp) # 4-byte Spill ; X32-NEXT: setb -356(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -580(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, -380(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi @@ -4695,20 +4695,20 @@ ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb -380(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl -380(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -216(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -600(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -208(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -228(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -604(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -224(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -204(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl -116(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -84(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -88(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movzbl -356(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: adcl $0, %ecx @@ -4716,117 +4716,117 @@ ; X32-NEXT: adcl -660(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -952(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -956(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -76(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -72(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -72(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -224(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -84(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -68(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -176(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -200(%ebp) # 4-byte Folded Spill -; X32-NEXT: movzbl -32(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, -172(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -208(%ebp) # 4-byte Folded Spill +; X32-NEXT: movzbl -36(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -204(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %edi, -380(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %esi, -308(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -216(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, -208(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -228(%ebp) # 4-byte Spill ; X32-NEXT: movl -516(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, -116(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -356(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl -116(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -484(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -176(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -488(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -212(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -88(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, -116(%ebp) # 4-byte Spill ; X32-NEXT: movl -476(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -220(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl -248(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -220(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -224(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -248(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -100(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -176(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -480(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -204(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -212(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -384(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: addl -356(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -100(%ebp) # 4-byte Spill -; X32-NEXT: adcl -32(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -204(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -84(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl %esi, -176(%ebp) # 4-byte Spill +; X32-NEXT: adcl -36(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -212(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -88(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -116(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -476(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -236(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, -356(%ebp) # 4-byte Spill -; X32-NEXT: movl -248(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -112(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -32(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -248(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -36(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -32(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -36(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -480(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %esi # 4-byte Folded Reload @@ -4834,42 +4834,42 @@ ; X32-NEXT: adcl -128(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -176(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -356(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -204(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -32(%ebp) # 4-byte Spill +; X32-NEXT: adcl -212(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -36(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -84(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -88(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -480(%ebp) # 4-byte Spill ; X32-NEXT: adcl -116(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -384(%ebp) # 4-byte Spill -; X32-NEXT: setb -204(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -212(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -516(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -236(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -176(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -116(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -100(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -176(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -112(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -108(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -176(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -84(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -112(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -84(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -88(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -484(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %esi # 4-byte Folded Reload @@ -4880,8 +4880,8 @@ ; X32-NEXT: movl -480(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -116(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -384(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -100(%ebp) # 4-byte Folded Spill -; X32-NEXT: movzbl -204(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, -176(%ebp) # 4-byte Folded Spill +; X32-NEXT: movzbl -212(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: movl %esi, -484(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx @@ -4889,34 +4889,34 @@ ; X32-NEXT: movl -548(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %edi, %ecx ; X32-NEXT: imull %eax, %ecx -; X32-NEXT: movl -236(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill ; X32-NEXT: addl %ecx, %edx ; X32-NEXT: imull -544(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %edx, %esi -; X32-NEXT: movl %esi, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -240(%ebp) # 4-byte Spill ; X32-NEXT: movl -580(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -148(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload ; X32-NEXT: imull %ebx, %esi -; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl -140(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -136(%ebp), %esi # 4-byte Reload ; X32-NEXT: imull %edi, %esi ; X32-NEXT: addl %edx, %esi -; X32-NEXT: addl -204(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -236(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -140(%ebp) # 4-byte Spill +; X32-NEXT: addl -212(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -88(%ebp) # 4-byte Spill +; X32-NEXT: adcl -240(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -136(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl %edi, %esi ; X32-NEXT: movl -548(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi @@ -4928,48 +4928,48 @@ ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -84(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -88(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -304(%ebp) # 4-byte Spill -; X32-NEXT: adcl -140(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -84(%ebp) # 4-byte Spill +; X32-NEXT: adcl -136(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -88(%ebp) # 4-byte Spill ; X32-NEXT: movl -476(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %esi # 4-byte Reload ; X32-NEXT: imull %eax, %esi -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill ; X32-NEXT: addl %esi, %edx -; X32-NEXT: imull -248(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: imull -252(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl %ecx, -244(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -248(%ebp) # 4-byte Spill ; X32-NEXT: movl -516(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %ebx # 4-byte Reload ; X32-NEXT: imull %ebx, %esi ; X32-NEXT: movl -300(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %esi, %edx ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %ecx # 4-byte Reload ; X32-NEXT: imull %eax, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl -148(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -144(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -128(%ebp) # 4-byte Spill -; X32-NEXT: adcl -244(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -132(%ebp) # 4-byte Spill +; X32-NEXT: adcl -248(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -164(%ebp) # 4-byte Spill ; X32-NEXT: movl -476(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi @@ -4977,28 +4977,28 @@ ; X32-NEXT: addl %ecx, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl -300(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -248(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -252(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl %edi, %ebx -; X32-NEXT: setb -244(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -144(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -248(%ebp) # 4-byte Folded Reload +; X32-NEXT: setb -248(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload +; X32-NEXT: mull -252(%ebp) # 4-byte Folded Reload ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -244(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -248(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: addl -128(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -132(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -236(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -204(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -164(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -240(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -212(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -304(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -84(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -88(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: addl -116(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -148(%ebp) # 4-byte Spill -; X32-NEXT: adcl -100(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -164(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -144(%ebp) # 4-byte Spill +; X32-NEXT: adcl -176(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -160(%ebp) # 4-byte Spill ; X32-NEXT: adcl -484(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -384(%ebp) # 4-byte Spill ; X32-NEXT: adcl -488(%ebp), %edx # 4-byte Folded Reload @@ -5006,13 +5006,13 @@ ; X32-NEXT: movl 8(%ebp), %esi ; X32-NEXT: movl 104(%esi), %ebx ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl %ebx, -244(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl %ebx, -248(%ebp) # 4-byte Spill +; X32-NEXT: movl -152(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl 108(%esi), %eax -; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -176(%ebp) # 4-byte Spill ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi @@ -5023,93 +5023,93 @@ ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebx ; X32-NEXT: setb -116(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -176(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ebx, %edi ; X32-NEXT: movzbl -116(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl -244(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %eax # 4-byte Reload ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, -128(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -248(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill ; X32-NEXT: addl -28(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -256(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -148(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl %edx, -140(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -136(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %ecx ; X32-NEXT: movl 96(%ecx), %edi ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, -84(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edi, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, -304(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -132(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -164(%ebp) # 4-byte Spill ; X32-NEXT: movl 100(%ecx), %eax ; X32-NEXT: movl %eax, -116(%ebp) # 4-byte Spill ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl -132(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -164(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl -92(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, -132(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: setb -144(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -140(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -116(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %edi, %ecx -; X32-NEXT: movzbl -144(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -140(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %edx, -188(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -192(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -140(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl -256(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl %esi, %eax -; X32-NEXT: addl -236(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -240(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -28(%ebp) # 4-byte Spill -; X32-NEXT: adcl -204(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -256(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -112(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -140(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -84(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -212(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -108(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -136(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -88(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -204(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -212(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -204(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -212(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb %cl ; X32-NEXT: movl -116(%ebp), %eax # 4-byte Reload @@ -5117,41 +5117,41 @@ ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -140(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -100(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -192(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -244(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -236(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -256(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -204(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %eax, -240(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -212(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -112(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -144(%ebp) # 4-byte Spill -; X32-NEXT: adcl -140(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -188(%ebp) # 4-byte Spill -; X32-NEXT: setb -112(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl -108(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -140(%ebp) # 4-byte Spill +; X32-NEXT: adcl -136(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -192(%ebp) # 4-byte Spill +; X32-NEXT: setb -108(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -256(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -148(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -28(%ebp) # 4-byte Spill -; X32-NEXT: movl -100(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -176(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -256(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -148(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull -96(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -256(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl %edi, %eax @@ -5160,40 +5160,40 @@ ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -248(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -252(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl -100(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -128(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -244(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -144(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -28(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -256(%ebp) # 4-byte Folded Spill -; X32-NEXT: movzbl -112(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -148(%ebp) # 4-byte Folded Spill +; X32-NEXT: movzbl -108(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl %edi, -248(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -252(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, -128(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %ecx ; X32-NEXT: movl 112(%ecx), %eax -; X32-NEXT: movl %eax, -156(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -244(%ebp) # 4-byte Spill ; X32-NEXT: imull %eax, %esi -; X32-NEXT: movl -108(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -140(%ebp) # 4-byte Spill ; X32-NEXT: addl %esi, %edx ; X32-NEXT: movl 116(%ecx), %eax -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill ; X32-NEXT: imull %eax, %edi ; X32-NEXT: addl %edx, %edi -; X32-NEXT: movl %edi, -108(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -104(%ebp) # 4-byte Spill ; X32-NEXT: movl 120(%ecx), %eax ; X32-NEXT: movl %ecx, %ebx ; X32-NEXT: movl %eax, %edi ; X32-NEXT: movl -92(%ebp), %esi # 4-byte Reload ; X32-NEXT: imull %esi, %edi -; X32-NEXT: movl -168(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, -96(%ebp) # 4-byte Spill ; X32-NEXT: addl %edi, %edx @@ -5201,28 +5201,28 @@ ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: imull %eax, %ebx ; X32-NEXT: addl %edx, %ebx -; X32-NEXT: movl -144(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, -96(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -108(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -156(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -104(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -144(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -140(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -144(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -140(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -104(%ebp) # 4-byte Folded Reload +; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload +; X32-NEXT: mull -100(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -168(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: setb %cl ; X32-NEXT: movl -92(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -104(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -100(%ebp) # 4-byte Folded Reload ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx @@ -5230,34 +5230,34 @@ ; X32-NEXT: movl %eax, -92(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edx ; X32-NEXT: movl %edx, -96(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload ; X32-NEXT: imull %eax, %edi ; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill ; X32-NEXT: addl %edi, %edx ; X32-NEXT: imull -116(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: movl %ecx, -284(%ebp) # 4-byte Spill -; X32-NEXT: movl -244(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload ; X32-NEXT: imull %ebx, %ecx -; X32-NEXT: movl -212(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -100(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -176(%ebp), %ecx # 4-byte Reload ; X32-NEXT: imull %edi, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl -104(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -104(%ebp) # 4-byte Spill +; X32-NEXT: addl -100(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -100(%ebp) # 4-byte Spill ; X32-NEXT: adcl -284(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -176(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl -84(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, -284(%ebp) # 4-byte Spill @@ -5274,62 +5274,62 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull -116(%ebp) # 4-byte Folded Reload ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %bl, %esi ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: addl -104(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -100(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: addl -100(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -176(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -108(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -168(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -104(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -152(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -92(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -96(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: addl -28(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, %edi -; X32-NEXT: adcl -256(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -148(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, %ebx -; X32-NEXT: adcl -248(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -252(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -116(%ebp) # 4-byte Spill ; X32-NEXT: adcl -128(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -256(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -176(%ebp) # 4-byte Spill ; X32-NEXT: movl -304(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl -64(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -220(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -236(%ebp), %edx # 4-byte Reload +; X32-NEXT: addl -60(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -164(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -224(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -240(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl -356(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -204(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -32(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -148(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -36(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -144(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -284(%ebp) # 4-byte Spill -; X32-NEXT: adcl -164(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -160(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl -384(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl %edi, -116(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -256(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -176(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl -300(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: addl -76(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -72(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -304(%ebp) # 4-byte Spill -; X32-NEXT: adcl -72(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -132(%ebp) # 4-byte Spill -; X32-NEXT: adcl -176(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -236(%ebp) # 4-byte Spill -; X32-NEXT: adcl -200(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -204(%ebp) # 4-byte Spill -; X32-NEXT: movl -224(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -84(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -164(%ebp) # 4-byte Spill +; X32-NEXT: adcl -172(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -240(%ebp) # 4-byte Spill +; X32-NEXT: adcl -208(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -212(%ebp) # 4-byte Spill +; X32-NEXT: movl -204(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -284(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl -380(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -140(%ebp) # 4-byte Spill -; X32-NEXT: movl -308(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %ebx, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -116(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -208(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -256(%ebp) # 4-byte Spill +; X32-NEXT: adcl -228(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -176(%ebp) # 4-byte Spill ; X32-NEXT: movl -492(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -256(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, -28(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 92(%eax), %eax @@ -5340,11 +5340,11 @@ ; X32-NEXT: addl -28(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload @@ -5353,65 +5353,65 @@ ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -556(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -136(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -132(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -560(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -264(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -260(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movl %ecx, -92(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, -28(%ebp) # 4-byte Spill ; X32-NEXT: movl -552(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -260(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -256(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -168(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill ; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -168(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -152(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -128(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -460(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: setb -152(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -460(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movzbl -152(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -712(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -976(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -108(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill -; X32-NEXT: adcl -104(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -168(%ebp) # 4-byte Spill +; X32-NEXT: addl -104(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill +; X32-NEXT: adcl -100(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -152(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, -92(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -28(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -552(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -104(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill +; X32-NEXT: movl -552(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -104(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl -56(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %cl ; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload @@ -5420,25 +5420,25 @@ ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -524(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -160(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -156(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -528(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -268(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -264(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -108(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -104(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -104(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -100(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: addl -92(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -28(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: setb -28(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -492(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -184(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, -92(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx @@ -5446,10 +5446,10 @@ ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, -92(%ebp) # 4-byte Spill ; X32-NEXT: movl -492(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -60(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -208(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill ; X32-NEXT: adcl -92(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: setb -92(%ebp) # 1-byte Folded Spill @@ -5459,46 +5459,46 @@ ; X32-NEXT: movzbl -92(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl -556(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -160(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -156(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -560(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -268(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -264(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -212(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -228(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -208(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -216(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: movzbl -28(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -712(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -212(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -228(%ebp) # 4-byte Spill ; X32-NEXT: adcl -968(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -208(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -216(%ebp) # 4-byte Spill ; X32-NEXT: adcl -964(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -244(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -248(%ebp) # 4-byte Spill ; X32-NEXT: adcl -972(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -248(%ebp) # 4-byte Spill -; X32-NEXT: movl -388(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl %ebx, -252(%ebp) # 4-byte Spill +; X32-NEXT: movl -388(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -256(%ebp), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, -92(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -168(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 76(%eax), %eax ; X32-NEXT: movl %eax, -28(%ebp) # 4-byte Spill -; X32-NEXT: mull %ecx +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -168(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -120(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -244(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload @@ -5507,31 +5507,31 @@ ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -564(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -136(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -132(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -568(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -264(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -260(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -156(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -180(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill ; X32-NEXT: movl -520(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -260(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -256(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -308(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -168(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -312(%ebp) # 4-byte Spill ; X32-NEXT: movl -444(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -152(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -308(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl -444(%ebp), %edi # 4-byte Reload @@ -5543,17 +5543,17 @@ ; X32-NEXT: addl -716(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -992(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: addl -92(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -252(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -100(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -156(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -48(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill +; X32-NEXT: adcl -244(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -148(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -180(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -520(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, -92(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -244(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx @@ -5561,7 +5561,7 @@ ; X32-NEXT: addl -92(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax @@ -5574,91 +5574,91 @@ ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -500(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -160(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -156(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -496(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -268(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -264(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -252(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -244(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -92(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -156(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -48(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -48(%ebp) # 1-byte Folded Spill +; X32-NEXT: addl -180(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -40(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: setb -40(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -388(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -184(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -156(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -180(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -156(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -180(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -156(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -180(%ebp) # 4-byte Spill ; X32-NEXT: movl -388(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -60(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -156(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill +; X32-NEXT: adcl -180(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -156(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -180(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -156(%ebp), %edi # 1-byte Folded Reload +; X32-NEXT: movzbl -180(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl -564(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -160(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -156(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -568(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -268(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -264(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -100(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl -84(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -48(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -40(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -716(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -988(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -984(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -980(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: addl -148(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -100(%ebp) # 4-byte Spill +; X32-NEXT: addl -144(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -148(%ebp) # 4-byte Spill ; X32-NEXT: adcl -128(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -108(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -144(%ebp) # 4-byte Spill -; X32-NEXT: adcl -104(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -188(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -212(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -208(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -244(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl %esi, -88(%ebp) # 4-byte Spill +; X32-NEXT: adcl -104(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -140(%ebp) # 4-byte Spill +; X32-NEXT: adcl -100(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, -192(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -228(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -216(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -248(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -388(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -348(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -252(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -388(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: addl -100(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -216(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -220(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload @@ -5667,19 +5667,19 @@ ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -564(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -180(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -184(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -568(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -320(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -316(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movl %ecx, -128(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -144(%ebp) # 4-byte Spill ; X32-NEXT: movl -520(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -348(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, -156(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -180(%ebp) # 4-byte Spill ; X32-NEXT: movl -444(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi @@ -5687,33 +5687,33 @@ ; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -216(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl -220(%ebp), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -112(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: setb -108(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -444(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -112(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: mull %ecx +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl -108(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -720(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -1008(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -108(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -104(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -300(%ebp) # 4-byte Spill -; X32-NEXT: adcl -48(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -112(%ebp) # 4-byte Spill +; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -108(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, -128(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -148(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -144(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -520(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -288(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx @@ -5725,7 +5725,7 @@ ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %cl ; X32-NEXT: movl -444(%ebp), %eax # 4-byte Reload @@ -5736,23 +5736,23 @@ ; X32-NEXT: movl -500(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -280(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -496(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -312(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -308(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl -300(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -48(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -108(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %eax, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -104(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: addl -128(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -148(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -112(%ebp) # 1-byte Folded Spill +; X32-NEXT: adcl -144(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: setb -108(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -388(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -288(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, -128(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx @@ -5775,47 +5775,47 @@ ; X32-NEXT: movl -564(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl -280(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -568(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -312(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -308(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -148(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx ; X32-NEXT: movl -128(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -112(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -108(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -720(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -664(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -996(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -1000(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -156(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -180(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -100(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -104(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -144(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -48(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -108(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -212(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -148(%ebp) # 4-byte Spill -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -228(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -144(%ebp) # 4-byte Spill +; X32-NEXT: adcl -216(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -128(%ebp) # 4-byte Spill -; X32-NEXT: adcl -244(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -248(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -144(%ebp) # 4-byte Spill -; X32-NEXT: setb -100(%ebp) # 1-byte Folded Spill +; X32-NEXT: adcl -248(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -88(%ebp) # 4-byte Spill +; X32-NEXT: adcl -252(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, -140(%ebp) # 4-byte Spill +; X32-NEXT: setb -148(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -492(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi @@ -5823,32 +5823,32 @@ ; X32-NEXT: addl %ecx, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -216(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, -208(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -248(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -252(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -248(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -252(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -556(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -316(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -560(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl %esi, -180(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -184(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl %ecx, -320(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -316(%ebp) # 4-byte Spill ; X32-NEXT: movl -552(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -248(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill ; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi @@ -5856,43 +5856,43 @@ ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -216(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -244(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -248(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -188(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -192(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movzbl -188(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -192(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl -724(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -1004(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -212(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -208(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -188(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -180(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -320(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl -228(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -216(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -192(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -184(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -316(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -552(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -288(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -208(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -216(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill ; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -216(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull -16(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -208(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %cl ; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload @@ -5903,31 +5903,31 @@ ; X32-NEXT: movl -524(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -280(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -528(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -312(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -308(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %edi, -212(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -208(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %edi, -228(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -216(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -180(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -184(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -524(%ebp) # 4-byte Spill -; X32-NEXT: adcl -320(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -316(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -528(%ebp) # 4-byte Spill -; X32-NEXT: setb -180(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -184(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -492(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -288(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -188(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -320(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -192(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -316(%ebp) # 4-byte Spill ; X32-NEXT: movl -96(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -188(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -192(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull -16(%ebp) # 4-byte Folded Reload @@ -5935,39 +5935,39 @@ ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -188(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -192(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull -16(%ebp) # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -188(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -192(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -556(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -280(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -560(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -312(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -308(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -320(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -316(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl -524(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -528(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movzbl -180(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -184(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl -724(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -668(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -732(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -728(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -248(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -128(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -244(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -212(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -144(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -208(%ebp) # 4-byte Folded Spill -; X32-NEXT: movzbl -100(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: addl %eax, -252(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -128(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -248(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -228(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -216(%ebp) # 4-byte Folded Spill +; X32-NEXT: movzbl -148(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, -320(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -316(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %edi, -300(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %esi @@ -5976,205 +5976,204 @@ ; X32-NEXT: movl %ecx, -560(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %ebx ; X32-NEXT: movl 96(%ebx), %ecx -; X32-NEXT: movl %ecx, -312(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %ecx, -308(%ebp) # 4-byte Spill +; X32-NEXT: movl -188(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -100(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -180(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -148(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl 100(%ebx), %ebx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx -; X32-NEXT: movl %ebx, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -148(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb -280(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %esi, %ebx ; X32-NEXT: movzbl -280(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ecx -; X32-NEXT: movl -312(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -308(%ebp), %eax # 4-byte Reload ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, -280(%ebp) # 4-byte Spill -; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -156(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -264(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: addl %ebx, %edi -; X32-NEXT: movl %edi, -188(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -192(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movl %esi, -144(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -312(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %esi, -140(%ebp) # 4-byte Spill +; X32-NEXT: movl -256(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl -308(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, -160(%ebp) # 4-byte Spill +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -100(%ebp), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: addl %edi, %ecx +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, -384(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %edi +; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %cl -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebx +; X32-NEXT: movl -120(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -84(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -136(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -88(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -132(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -264(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %edi +; X32-NEXT: adcl -260(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl -180(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -148(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -184(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -88(%ebp) # 4-byte Spill +; X32-NEXT: adcl -144(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -280(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -188(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -144(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -192(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -140(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 104(%eax), %ecx -; X32-NEXT: movl %ecx, -180(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %ecx, -184(%ebp) # 4-byte Spill +; X32-NEXT: movl -256(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, -128(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl -128(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl 12(%ebp), %eax -; X32-NEXT: movl 108(%eax), %edx -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %ebx, -112(%ebp) # 4-byte Spill +; X32-NEXT: movl 108(%eax), %ebx +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx +; X32-NEXT: movl %ebx, -108(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, -128(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi -; X32-NEXT: setb -176(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -172(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %esi, %edi -; X32-NEXT: movzbl -176(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -172(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ecx -; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %edx, -200(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -176(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, -208(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl -132(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl -264(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -260(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %edi, %esi ; X32-NEXT: adcl %ecx, %eax -; X32-NEXT: movl -84(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl %ecx, -148(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -88(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl %ecx, -144(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, -128(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -188(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -136(%ebp) # 4-byte Spill -; X32-NEXT: adcl -144(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -264(%ebp) # 4-byte Spill -; X32-NEXT: setb -84(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl -192(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -132(%ebp) # 4-byte Spill +; X32-NEXT: adcl -140(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -260(%ebp) # 4-byte Spill +; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl %ebx, %esi ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -144(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -140(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -280(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -144(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -140(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -112(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -108(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -144(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -140(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -112(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -144(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -140(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -160(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -176(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -156(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -172(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -280(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -264(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -136(%ebp) # 4-byte Spill -; X32-NEXT: movzbl -84(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: adcl -260(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -132(%ebp) # 4-byte Spill +; X32-NEXT: movzbl -88(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl %esi, -160(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -156(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, -268(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -264(%ebp) # 4-byte Spill ; X32-NEXT: movl -348(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %ebx, %ecx ; X32-NEXT: imull %eax, %ecx -; X32-NEXT: movl -180(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, -264(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -260(%ebp) # 4-byte Spill ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull -216(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: imull -220(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %edx, %esi -; X32-NEXT: movl %esi, -180(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -184(%ebp) # 4-byte Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -100(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %ebx # 4-byte Reload ; X32-NEXT: imull %ebx, %esi -; X32-NEXT: movl -312(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -308(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %edx ; X32-NEXT: movl -16(%ebp), %esi # 4-byte Reload ; X32-NEXT: imull %edi, %esi ; X32-NEXT: addl %edx, %esi -; X32-NEXT: addl -264(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -180(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -260(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -88(%ebp) # 4-byte Spill +; X32-NEXT: adcl -184(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload @@ -6188,93 +6187,92 @@ ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -216(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -264(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -260(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -84(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -88(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -348(%ebp) # 4-byte Spill ; X32-NEXT: adcl -16(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -180(%ebp) # 4-byte Spill -; X32-NEXT: movl 12(%ebp), %edx -; X32-NEXT: movl 124(%edx), %ecx -; X32-NEXT: movl -260(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -184(%ebp) # 4-byte Spill +; X32-NEXT: movl 12(%ebp), %edi +; X32-NEXT: movl 124(%edi), %ecx +; X32-NEXT: movl -256(%ebp), %eax # 4-byte Reload ; X32-NEXT: imull %eax, %ecx -; X32-NEXT: movl 120(%edx), %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl 120(%edi), %esi ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull -124(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: imull -120(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %edx, %esi ; X32-NEXT: movl 112(%edi), %ebx ; X32-NEXT: movl 116(%edi), %ecx ; X32-NEXT: movl %ecx, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %edi ; X32-NEXT: imull %ecx, %edi ; X32-NEXT: mull %ebx ; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl -60(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %ecx # 4-byte Reload ; X32-NEXT: imull %ebx, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl -216(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill +; X32-NEXT: addl -220(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -188(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movl %ecx, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -56(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -256(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -312(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -308(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl -312(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -308(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull -124(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -120(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl %edi, %ebx -; X32-NEXT: setb -260(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -256(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -124(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -120(%ebp) # 4-byte Folded Reload ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -260(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -256(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: addl -184(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -60(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -216(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -188(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -56(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -220(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -288(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -264(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -260(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -348(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -180(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -184(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: addl -280(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -216(%ebp) # 4-byte Spill -; X32-NEXT: adcl -136(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -264(%ebp) # 4-byte Spill -; X32-NEXT: adcl -160(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -180(%ebp) # 4-byte Spill -; X32-NEXT: adcl -268(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %esi, -220(%ebp) # 4-byte Spill +; X32-NEXT: adcl -132(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -260(%ebp) # 4-byte Spill +; X32-NEXT: adcl -156(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill +; X32-NEXT: adcl -264(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -288(%ebp) # 4-byte Spill ; X32-NEXT: movl -352(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -520(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx @@ -6285,10 +6283,10 @@ ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -132(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx @@ -6298,53 +6296,53 @@ ; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -496(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -160(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -156(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill ; X32-NEXT: movl -416(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -520(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -124(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -188(%ebp) # 4-byte Spill +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -124(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -120(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -444(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -120(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -500(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -324(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -320(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -496(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -400(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl -60(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -56(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -500(%ebp) # 4-byte Spill -; X32-NEXT: adcl -136(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -132(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -496(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -160(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -156(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -16(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -416(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -388(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi @@ -6357,54 +6355,54 @@ ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -136(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -132(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -136(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -132(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -324(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -320(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -564(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -568(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -500(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -60(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %eax, -56(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl -496(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -132(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -160(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -324(%ebp) # 4-byte Spill +; X32-NEXT: addl -156(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -320(%ebp) # 4-byte Spill ; X32-NEXT: adcl -16(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -400(%ebp) # 4-byte Spill -; X32-NEXT: setb -160(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -156(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -352(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -388(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -268(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -264(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -268(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -264(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull -28(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -268(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -264(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -260(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -256(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -28(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -260(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -256(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -364(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -564(%ebp), %esi # 4-byte Folded Reload @@ -6412,11 +6410,11 @@ ; X32-NEXT: adcl -568(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -320(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -16(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -400(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -268(%ebp) # 4-byte Folded Spill -; X32-NEXT: movzbl -160(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, -264(%ebp) # 4-byte Folded Spill +; X32-NEXT: movzbl -156(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: movl %esi, -364(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx @@ -6439,18 +6437,18 @@ ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl -192(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %esi # 4-byte Reload ; X32-NEXT: imull %edi, %esi ; X32-NEXT: addl %edx, %esi ; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -28(%ebp) # 4-byte Spill ; X32-NEXT: adcl -388(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -192(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -196(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -324(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -320(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi @@ -6462,7 +6460,7 @@ ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -260(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -256(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %edi ; X32-NEXT: setb %bl ; X32-NEXT: movl -444(%ebp), %eax # 4-byte Reload @@ -6472,8 +6470,8 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -28(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -340(%ebp) # 4-byte Spill -; X32-NEXT: adcl -192(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -192(%ebp) # 4-byte Spill +; X32-NEXT: adcl -196(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -196(%ebp) # 4-byte Spill ; X32-NEXT: movl -416(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload ; X32-NEXT: imull %eax, %edi @@ -6482,7 +6480,7 @@ ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, -28(%ebp) # 4-byte Spill ; X32-NEXT: addl %edi, %edx -; X32-NEXT: imull -316(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: imull -324(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: movl %ecx, -492(%ebp) # 4-byte Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload @@ -6492,13 +6490,13 @@ ; X32-NEXT: movl -552(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -120(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %ecx # 4-byte Reload ; X32-NEXT: imull %ebx, %ecx ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: addl -28(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -96(%ebp) # 4-byte Spill ; X32-NEXT: adcl -492(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -112(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx @@ -6510,11 +6508,11 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl -552(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -316(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -160(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -156(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %edi ; X32-NEXT: setb %cl ; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload @@ -6524,135 +6522,135 @@ ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: addl -96(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -120(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -112(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -28(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl -324(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -260(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -320(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -156(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -256(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -340(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -192(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -196(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl -16(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: adcl -268(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -160(%ebp) # 4-byte Spill +; X32-NEXT: adcl -264(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -156(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, %edx ; X32-NEXT: adcl -364(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -396(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl -164(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -124(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl -160(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -120(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -384(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -148(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -136(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -144(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -132(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl -128(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -216(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -220(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -28(%ebp) # 4-byte Spill -; X32-NEXT: movl -160(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -264(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -180(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl -156(%ebp), %ebx # 4-byte Reload +; X32-NEXT: adcl -260(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -184(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -112(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl -288(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -16(%ebp) # 4-byte Spill -; X32-NEXT: addl -248(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill -; X32-NEXT: adcl -244(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -124(%ebp) # 4-byte Spill -; X32-NEXT: adcl -212(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -60(%ebp) # 4-byte Spill -; X32-NEXT: adcl -208(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -136(%ebp) # 4-byte Spill +; X32-NEXT: addl -252(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -188(%ebp) # 4-byte Spill +; X32-NEXT: adcl -248(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -120(%ebp) # 4-byte Spill +; X32-NEXT: adcl -228(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -56(%ebp) # 4-byte Spill +; X32-NEXT: adcl -216(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -132(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -320(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -316(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -300(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -160(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %ebx, -156(%ebp) # 4-byte Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -556(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -560(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl -344(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -168(%ebp) # 4-byte Spill -; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -232(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -252(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -436(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -312(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl -344(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -236(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -436(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -92(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl -472(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -92(%ebp) # 4-byte Spill -; X32-NEXT: movl -156(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -88(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload +; X32-NEXT: adcl -80(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -100(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -296(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl -40(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -32(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -52(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -104(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -40(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -48(%ebp) # 4-byte Spill -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -56(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -108(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -304(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -132(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -236(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -188(%ebp) # 4-byte Spill +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -240(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -212(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -132(%ebp) # 4-byte Spill ; X32-NEXT: adcl -284(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -28(%ebp) # 4-byte Spill -; X32-NEXT: movl -160(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -140(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -156(%ebp), %edx # 4-byte Reload +; X32-NEXT: adcl -136(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -116(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl -16(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -176(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -432(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -168(%ebp) # 4-byte Spill -; X32-NEXT: adcl -456(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -308(%ebp) # 4-byte Spill -; X32-NEXT: adcl -44(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -252(%ebp) # 4-byte Spill +; X32-NEXT: addl -432(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -312(%ebp) # 4-byte Spill +; X32-NEXT: movl -152(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -456(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -152(%ebp) # 4-byte Spill +; X32-NEXT: adcl -48(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -244(%ebp) # 4-byte Spill ; X32-NEXT: movl -92(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -52(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -64(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -92(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -156(%ebp) # 4-byte Spill +; X32-NEXT: adcl -20(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, -180(%ebp) # 4-byte Spill +; X32-NEXT: movl -100(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -268(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl -40(%ebp), %ebx # 4-byte Reload +; X32-NEXT: adcl -276(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -272(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -104(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -276(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -240(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -108(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -172(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -124(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -80(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -124(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -36(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -60(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -20(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl -168(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -120(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -76(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -44(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -24(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -132(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -336(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -28(%ebp) # 4-byte Spill ; X32-NEXT: adcl -360(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -392(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -412(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -16(%ebp) # 4-byte Spill @@ -6685,36 +6683,36 @@ ; X32-NEXT: movl %esi, 48(%ecx) ; X32-NEXT: movl -540(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 52(%ecx) -; X32-NEXT: movl -228(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -232(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 56(%ecx) -; X32-NEXT: movl -196(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -200(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 60(%ecx) -; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 64(%ecx) -; X32-NEXT: movl -308(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 68(%ecx) -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -244(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 72(%ecx) ; X32-NEXT: movl -92(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 76(%ecx) -; X32-NEXT: movl -156(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 80(%ecx) -; X32-NEXT: movl -104(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 84(%ecx) ; X32-NEXT: movl %ebx, 88(%ecx) -; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 92(%ecx) ; X32-NEXT: movl %edi, 96(%ecx) -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 100(%ecx) -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 104(%ecx) -; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 108(%ecx) ; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 112(%ecx) ; X32-NEXT: movl %edx, 116(%ecx) -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 120(%ecx) ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 124(%ecx) @@ -6723,7 +6721,7 @@ ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: popl %ebp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; X64-LABEL: test_1024: ; X64: # BB#0: @@ -6764,17 +6762,16 @@ ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r11, %r12 -; X64-NEXT: movq %r11, %r8 -; X64-NEXT: addq %rax, %r12 -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r9 -; X64-NEXT: movq %r9, (%rsp) # 8-byte Spill -; X64-NEXT: adcq %rdx, %rax -; X64-NEXT: addq %rbp, %r12 -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rbx, %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %rax, %rcx +; X64-NEXT: movq %rdi, %r14 +; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rdx, %r14 +; X64-NEXT: addq %rbp, %rcx +; X64-NEXT: movq %rcx, %r12 +; X64-NEXT: movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rbx, %r14 +; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq (%rsi), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: xorl %ebp, %ebp @@ -6784,38 +6781,37 @@ ; X64-NEXT: movq 8(%rsi), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %rbp -; X64-NEXT: xorl %r11d, %r11d +; X64-NEXT: xorl %r9d, %r9d ; X64-NEXT: movq %rax, %r15 ; X64-NEXT: addq %rcx, %r15 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: addq %rdi, %r15 ; X64-NEXT: adcq %rcx, %rbp -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: setb %bl ; X64-NEXT: addq %rax, %rbp ; X64-NEXT: movzbl %bl, %ebx ; X64-NEXT: adcq %rdx, %rbx ; X64-NEXT: movq 16(%rsi), %rax ; X64-NEXT: movq %rsi, %r13 -; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rdi, %r14 -; X64-NEXT: addq %rax, %r14 -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: adcq %rdx, %r11 -; X64-NEXT: addq %rbp, %r14 -; X64-NEXT: adcq %rbx, %r11 -; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq %r8, %rbp -; X64-NEXT: movq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdi, %r9 +; X64-NEXT: addq %rax, %r9 +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: adcq %rdx, %rax +; X64-NEXT: addq %rbp, %r9 +; X64-NEXT: adcq %rbx, %rax +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: addq %rdi, %rax -; X64-NEXT: movq %r9, %rax -; X64-NEXT: adcq %rcx, %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rcx, %r8 +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq (%r10), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: xorl %r8d, %r8d @@ -6823,44 +6819,44 @@ ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rdi, %rax -; X64-NEXT: movq %rdi, %r9 ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: adcq %rcx, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq 32(%r13), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %r8 -; X64-NEXT: xorl %r8d, %r8d +; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq %rbx, %rcx +; X64-NEXT: movq %rbx, %r8 ; X64-NEXT: addq %r13, %rax ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: adcq %rdx, %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: addq %r9, %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %r15, %rax +; X64-NEXT: movq %r11, %rax +; X64-NEXT: addq %rdi, %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %r14, %r12 -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdi, %r11 +; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %r11, %rax +; X64-NEXT: adcq %r15, %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r11, %rdi +; X64-NEXT: movq %r12, %rax +; X64-NEXT: adcq %r9, %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rbp, %r14 +; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rbp, %rdi ; X64-NEXT: movq 8(%r10), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %rsi, %r11 +; X64-NEXT: movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rax, %r12 +; X64-NEXT: addq %rsi, %r12 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq %rcx, %r11 +; X64-NEXT: addq %r8, %r12 ; X64-NEXT: adcq %rsi, %rbp ; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: setb %bl @@ -6869,92 +6865,91 @@ ; X64-NEXT: adcq %rdx, %rbx ; X64-NEXT: movq 16(%r10), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %r8 +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rcx, %r8 -; X64-NEXT: addq %rax, %r8 +; X64-NEXT: movq %r8, %rcx +; X64-NEXT: addq %rax, %rcx ; X64-NEXT: movq %rsi, %r10 ; X64-NEXT: adcq %rdx, %r10 -; X64-NEXT: addq %rbp, %r8 -; X64-NEXT: movq %r8, %rax +; X64-NEXT: addq %rbp, %rcx ; X64-NEXT: adcq %rbx, %r10 -; X64-NEXT: movq %rcx, %rdx -; X64-NEXT: movq %rcx, %r12 -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: addq %r9, %rdx +; X64-NEXT: movq %r8, %rdx +; X64-NEXT: movq %r8, %r14 +; X64-NEXT: movq %r14, (%rsp) # 8-byte Spill +; X64-NEXT: addq %r11, %rdx ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r11, %r8 -; X64-NEXT: adcq %r8, %r15 +; X64-NEXT: movq %r12, %rsi +; X64-NEXT: adcq %rsi, %r15 ; X64-NEXT: movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rax, %r14 -; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rax, %rcx +; X64-NEXT: adcq %rcx, %r9 +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, %r8 ; X64-NEXT: adcq %r10, %rdi ; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq 40(%rsi), %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: xorl %r14d, %r14d -; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: addq %r9, %rdi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq 40(%rdi), %rax +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: xorl %r9d, %r9d +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: addq %r11, %rcx ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq %r13, %rdi -; X64-NEXT: adcq %r9, %rbp +; X64-NEXT: addq %r13, %rcx +; X64-NEXT: adcq %r11, %rbp ; X64-NEXT: setb %bl ; X64-NEXT: addq %rax, %rbp -; X64-NEXT: movzbl %bl, %r11d -; X64-NEXT: adcq %rdx, %r11 -; X64-NEXT: movq 48(%rsi), %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %r14 +; X64-NEXT: movzbl %bl, %ebx +; X64-NEXT: adcq %rdx, %rbx +; X64-NEXT: movq 48(%rdi), %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r13, %rbx -; X64-NEXT: addq %rax, %rbx -; X64-NEXT: movq %r9, %rsi -; X64-NEXT: adcq %rdx, %rsi -; X64-NEXT: addq %rbp, %rbx -; X64-NEXT: adcq %r11, %rsi -; X64-NEXT: movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: addq %r13, %r12 -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rdi, %r8 +; X64-NEXT: movq %r13, %r12 +; X64-NEXT: addq %rax, %r12 +; X64-NEXT: movq %r11, %rdi +; X64-NEXT: adcq %rdx, %rdi +; X64-NEXT: addq %rbp, %r12 +; X64-NEXT: adcq %rbx, %rdi +; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %r13, %r14 +; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rcx, %rsi +; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %r12, %r8 ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rbx, %rcx -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rsi, %r10 +; X64-NEXT: adcq %rdi, %r10 ; X64-NEXT: movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload -; X64-NEXT: movq %rdx, %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: movq %r8, %rax ; X64-NEXT: addq %r13, %rax -; X64-NEXT: movq (%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %r9, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: adcq %r11, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rdx, %rax +; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %r8, %r10 ; X64-NEXT: addq %r13, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload +; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload ; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: movq %rbx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rax, %r9 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rax, %r14 ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: movq 56(%rax), %r11 ; X64-NEXT: movq %r11, %rax ; X64-NEXT: movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r10 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rsi, %rbx @@ -6969,19 +6964,19 @@ ; X64-NEXT: setb %cl ; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r11 +; X64-NEXT: movq %rdi, %r13 ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r15 ; X64-NEXT: adcq %rdx, %r12 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r10, %rbp +; X64-NEXT: movq %r9, %rbp ; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill @@ -6993,65 +6988,63 @@ ; X64-NEXT: addq %rsi, %rbx ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r10 -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rcx, %r11 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %rbp, %rcx ; X64-NEXT: setb %bl ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rax, %rsi ; X64-NEXT: addq %rcx, %rsi ; X64-NEXT: movzbl %bl, %eax ; X64-NEXT: adcq %rax, %r13 ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload -; X64-NEXT: addq %r9, %rsi +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload +; X64-NEXT: addq %r14, %rsi ; X64-NEXT: adcq %r8, %r13 ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %r12 -; X64-NEXT: movq %r10, %rbx +; X64-NEXT: movq %r11, %rbx ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload -; X64-NEXT: mulq %r11 +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r10 +; X64-NEXT: movq %rax, %r14 ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r9 -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdi, %r11 +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: movq 24(%rax), %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq 24(%rax), %r9 ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %rbx -; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rbp, %r8 ; X64-NEXT: adcq %rdi, %rcx -; X64-NEXT: setb %dil -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %rbx +; X64-NEXT: setb %bl +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %dil, %ecx +; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: addq %r14, %rbp -; X64-NEXT: movq (%rsp), %rbx # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: adcq %r9, %rbx +; X64-NEXT: movq %r10, %rbp +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: addq %r11, %rbp +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbx # 8-byte Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: adcq %r10, %rbx ; X64-NEXT: addq %rax, %rbp ; X64-NEXT: adcq %rdx, %rbx -; X64-NEXT: addq %rsi, %r10 -; X64-NEXT: movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %rsi, %r14 +; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %r13, %r8 ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rbp @@ -7061,88 +7054,86 @@ ; X64-NEXT: setb %r15b ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r11, %rsi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %r11 +; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r12 # 8-byte Reload ; X64-NEXT: movq %r12, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %r11, %rdi +; X64-NEXT: addq %r14, %rdi ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload -; X64-NEXT: mulq %r8 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %rdi, %r11 +; X64-NEXT: movq %rax, %r14 +; X64-NEXT: addq %rdi, %r14 ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil ; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %r8, %r12 +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %r9, %r12 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: addq %r14, %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: adcq %r9, %r14 +; X64-NEXT: addq %r11, %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: adcq %r10, %r9 ; X64-NEXT: addq %rax, %rcx -; X64-NEXT: adcq %rdx, %r14 +; X64-NEXT: adcq %rdx, %r9 ; X64-NEXT: addq %rbp, %r13 -; X64-NEXT: adcq %rbx, %r11 +; X64-NEXT: adcq %rbx, %r14 ; X64-NEXT: movzbl %r15b, %eax ; X64-NEXT: adcq %rax, %rcx -; X64-NEXT: adcq $0, %r14 +; X64-NEXT: adcq $0, %r9 ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload ; X64-NEXT: movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload -; X64-NEXT: movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload ; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload +; X64-NEXT: movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: mulq %rsi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: movq 24(%rax), %rcx ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %r11 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rbx, %rbp ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: mulq %r9 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %r15 ; X64-NEXT: addq %rbp, %r15 ; X64-NEXT: adcq %rsi, %rbx ; X64-NEXT: setb %sil ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r8 ; X64-NEXT: adcq %rdx, %r10 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r11, %rbp +; X64-NEXT: movq %r9, %rbp ; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill @@ -7154,8 +7145,8 @@ ; X64-NEXT: addq %rdi, %rbx ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rcx, %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill @@ -7163,7 +7154,7 @@ ; X64-NEXT: setb %cl ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: movq %rsi, %rbp -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rdi, %rbx @@ -7175,11 +7166,11 @@ ; X64-NEXT: adcq %r15, %rsi ; X64-NEXT: adcq $0, %r8 ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %r9, %rax ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r9 +; X64-NEXT: movq %rax, %r11 ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: movq %rbp, %r14 ; X64-NEXT: mulq %rdi @@ -7188,11 +7179,11 @@ ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbp, %rax -; X64-NEXT: movq %rax, %r11 +; X64-NEXT: movq %rax, %r9 ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %dil ; X64-NEXT: movq %r14, %rax @@ -7200,7 +7191,7 @@ ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq (%rsp), %rdi # 8-byte Reload ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r13 # 8-byte Reload ; X64-NEXT: addq %r13, %rdi ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload @@ -7208,65 +7199,63 @@ ; X64-NEXT: adcq %r14, %rbp ; X64-NEXT: addq %rax, %rdi ; X64-NEXT: adcq %rdx, %rbp -; X64-NEXT: addq %rbx, %r9 -; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rsi, %r11 +; X64-NEXT: addq %rbx, %r11 ; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rsi, %r9 +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: addq %r8, %rdi ; X64-NEXT: adcq %r10, %rbp -; X64-NEXT: setb %r9b +; X64-NEXT: setb %r10b ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movq %rax, %r11 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload -; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r10, %rbx +; X64-NEXT: addq %r8, %rbx ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r15 -; X64-NEXT: addq %rbx, %r15 +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %rbx, %r8 ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %bl -; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r12 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload -; X64-NEXT: movq %r10, %rcx -; X64-NEXT: addq %r13, %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload -; X64-NEXT: movq %rbx, %rsi -; X64-NEXT: movq %rbx, %r12 -; X64-NEXT: adcq %r14, %rsi -; X64-NEXT: addq %rax, %rcx -; X64-NEXT: adcq %rdx, %rsi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq %r9, %r15 +; X64-NEXT: addq %r13, %r15 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload +; X64-NEXT: movq %r12, %r13 +; X64-NEXT: adcq %r14, %r13 +; X64-NEXT: addq %rax, %r15 +; X64-NEXT: adcq %rdx, %r13 ; X64-NEXT: addq %rdi, %r11 -; X64-NEXT: adcq %rbp, %r15 -; X64-NEXT: movzbl %r9b, %eax -; X64-NEXT: adcq %rax, %rcx -; X64-NEXT: adcq $0, %rsi +; X64-NEXT: adcq %rbp, %r8 +; X64-NEXT: movzbl %r10b, %eax +; X64-NEXT: adcq %rax, %r15 +; X64-NEXT: adcq $0, %r13 ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload ; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload -; X64-NEXT: movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload ; X64-NEXT: adcq $0, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill ; X64-NEXT: adcq $0, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill ; X64-NEXT: adcq $0, {{[0-9]+}}(%rsp) # 8-byte Folded Spill @@ -7277,104 +7266,106 @@ ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r14 -; X64-NEXT: movq %r8, %rbp -; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %r11 +; X64-NEXT: movq %rcx, %rbp ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: addq %rsi, %rcx ; X64-NEXT: adcq $0, %rbx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rcx, %r8 ; X64-NEXT: adcq %rbx, %rsi ; X64-NEXT: setb %cl -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r15 +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %r11 ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq %r10, %r9 -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload +; X64-NEXT: addq {{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload ; X64-NEXT: movq %r12, %r10 ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r9 ; X64-NEXT: adcq %rdx, %r10 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: mulq %rbp +; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: addq %r12, %rbx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r15 +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rcx, %r12 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rdi, %rbp +; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: adcq %rax, %r15 +; X64-NEXT: adcq %rax, %rdi ; X64-NEXT: addq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload ; X64-NEXT: addq %r14, %rbx -; X64-NEXT: adcq %r8, %r15 +; X64-NEXT: adcq %r8, %rdi ; X64-NEXT: adcq $0, %r9 ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %rbp, %rsi -; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %r12, %r11 +; X64-NEXT: movq %r11, %rax ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq %rbp, %r8 ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: addq %r14, %rcx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: movq 56(%rax), %rdi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %rdi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq 56(%rax), %rsi +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rsi, %r11 +; X64-NEXT: movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: addq %rcx, %r14 ; X64-NEXT: adcq %rbp, %rsi ; X64-NEXT: setb %cl ; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: mulq %r11 ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload -; X64-NEXT: addq %r11, %rcx +; X64-NEXT: movq (%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: addq %r8, %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload -; X64-NEXT: adcq %r13, %rsi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: adcq %r11, %rsi ; X64-NEXT: addq %rax, %rcx ; X64-NEXT: adcq %rdx, %rsi ; X64-NEXT: addq %rbx, %r12 -; X64-NEXT: adcq %r15, %r14 +; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rdi, %r14 ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: addq %r9, %rcx @@ -7389,97 +7380,92 @@ ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %r9, %rbx -; X64-NEXT: adcq $0, %r15 +; X64-NEXT: adcq $0, %r12 ; X64-NEXT: movq %rbp, %rax -; X64-NEXT: movq %r8, %rdi -; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rbx, %r8 -; X64-NEXT: adcq %r15, %r9 +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %rbx, %rbp +; X64-NEXT: adcq %r12, %r9 ; X64-NEXT: setb %bl ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: addq %r9, %rax ; X64-NEXT: movzbl %bl, %edi ; X64-NEXT: adcq %rdi, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload -; X64-NEXT: addq %r11, %r15 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: adcq %r13, %rbp -; X64-NEXT: addq %rax, %r15 -; X64-NEXT: adcq %rdx, %rbp +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload +; X64-NEXT: addq %r8, %r12 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: adcq %r11, %r10 +; X64-NEXT: addq %rax, %r12 +; X64-NEXT: adcq %rdx, %r10 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload ; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: adcq %rsi, %r8 +; X64-NEXT: adcq %rsi, %rbp ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # 1-byte Folded Reload -; X64-NEXT: adcq %rax, %r15 -; X64-NEXT: adcq $0, %rbp +; X64-NEXT: adcq %rax, %r12 +; X64-NEXT: adcq $0, %r10 ; X64-NEXT: addq {{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: addq %rax, {{[0-9]+}}(%rsp) # 8-byte Folded Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: adcq %rax, {{[0-9]+}}(%rsp) # 8-byte Folded Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload +; X64-NEXT: adcq %r15, {{[0-9]+}}(%rsp) # 8-byte Folded Spill +; X64-NEXT: adcq %r13, %r14 ; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rdx -; X64-NEXT: adcq $0, %r8 -; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %rbp +; X64-NEXT: adcq $0, %r12 +; X64-NEXT: adcq $0, %r10 ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload -; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload ; X64-NEXT: setb -{{[0-9]+}}(%rsp) # 1-byte Folded Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: mulq %rsi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %r10 +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %r11, %rbx ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r12 -; X64-NEXT: addq %rbx, %r12 +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: addq %rbx, %r9 ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %bl -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %r9 +; X64-NEXT: movq %rsi, %r13 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r8 -; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: movq %rcx, %r14 +; X64-NEXT: adcq %rdx, %r15 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r10, %rdi +; X64-NEXT: movq %r14, %rdi ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill @@ -7491,80 +7477,75 @@ ; X64-NEXT: addq %r11, %rbx ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r13 -; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rcx, %r14 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %bl ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %rcx, %rdi +; X64-NEXT: movq %rax, %r13 +; X64-NEXT: addq %rcx, %r13 ; X64-NEXT: movzbl %bl, %eax ; X64-NEXT: adcq %rax, %r11 -; X64-NEXT: addq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload +; X64-NEXT: addq {{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload -; X64-NEXT: addq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload -; X64-NEXT: adcq %r12, %r11 +; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload +; X64-NEXT: adcq %r9, %r11 ; X64-NEXT: adcq $0, %r8 -; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq $0, %r14 -; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r13, %rbx -; X64-NEXT: movq %rbx, %rax +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq $0, %r15 +; X64-NEXT: movq %r15, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r14, %rax ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq %rax, %r12 +; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: movq %rax, %r9 ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq %rsi, %r9 +; X64-NEXT: movq %rsi, %r15 ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %r10 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rcx -; X64-NEXT: addq %r8, %rcx +; X64-NEXT: addq %rbx, %rcx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload -; X64-NEXT: mulq %r13 +; X64-NEXT: movq %r14, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %rcx, %r8 ; X64-NEXT: adcq %rsi, %rbx ; X64-NEXT: setb %cl -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r13 -; X64-NEXT: movq %r13, %r9 +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %rdi ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r13 # 8-byte Reload -; X64-NEXT: addq %r13, %rsi -; X64-NEXT: movq (%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: adcq %r14, %rcx -; X64-NEXT: addq %rax, %rsi +; X64-NEXT: addq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: addq %rax, %r14 ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %rdi, %r12 +; X64-NEXT: addq %r13, %r9 +; X64-NEXT: movq %r9, %r13 ; X64-NEXT: adcq %r11, %r8 -; X64-NEXT: movq %r8, %r11 -; X64-NEXT: adcq $0, %rsi +; X64-NEXT: movq %r8, %r15 +; X64-NEXT: adcq $0, %r14 ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, (%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: setb -{{[0-9]+}}(%rsp) # 1-byte Folded Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq %r10, %rsi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, %r11 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload ; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %rsi @@ -7573,66 +7554,65 @@ ; X64-NEXT: addq %rcx, %rdi ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbx # 8-byte Reload +; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r10 -; X64-NEXT: addq %rdi, %r10 +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: addq %rdi, %r9 ; X64-NEXT: adcq %rsi, %rcx -; X64-NEXT: setb %bl +; X64-NEXT: setb %sil ; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %rbx ; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %bl, %ecx +; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: addq %r13, %rsi +; X64-NEXT: addq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: adcq %r14, %rcx +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload ; X64-NEXT: addq %rax, %rsi ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload -; X64-NEXT: adcq (%rsp), %r10 # 8-byte Folded Reload +; X64-NEXT: addq %r14, %r11 +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax # 1-byte Folded Reload ; X64-NEXT: adcq %rax, %rsi ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: addq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload +; X64-NEXT: addq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: addq %rax, {{[0-9]+}}(%rsp) # 8-byte Folded Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %rax, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill -; X64-NEXT: adcq %r15, %r12 -; X64-NEXT: movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rbp, %r11 -; X64-NEXT: movq %r11, (%rsp) # 8-byte Spill +; X64-NEXT: adcq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill +; X64-NEXT: adcq %r12, %r13 +; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %r10, %r15 +; X64-NEXT: movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax # 1-byte Folded Reload -; X64-NEXT: adcq %rax, %r14 -; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rax, %r11 +; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq $0, %r9 +; X64-NEXT: movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq 64(%r9), %r11 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: movq 64(%rcx), %r11 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rsi, %rbx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq 72(%rcx), %rsi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq 72(%r9), %rsi +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rsi, %rcx ; X64-NEXT: movq %rdx, %rsi @@ -7640,10 +7620,10 @@ ; X64-NEXT: addq %rbx, %r8 ; X64-NEXT: adcq %rbp, %rsi ; X64-NEXT: setb %bl -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %r10 -; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, %r13 +; X64-NEXT: movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: addq %rsi, %rdi @@ -7654,142 +7634,139 @@ ; X64-NEXT: mulq %rdx ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: movq %rdx, %r14 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r12 # 8-byte Reload -; X64-NEXT: addq %rbx, %r12 +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: addq %rbx, %r10 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload ; X64-NEXT: adcq %r14, %r15 -; X64-NEXT: addq %rdi, %r12 +; X64-NEXT: addq %rdi, %r10 ; X64-NEXT: adcq %rcx, %r15 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload +; X64-NEXT: movq %r12, %rax ; X64-NEXT: movq %r11, %rsi ; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: addq %r11, %rdi ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %r12, %rax +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq %rbp, %r11 +; X64-NEXT: mulq %r13 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: addq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r13 # 8-byte Reload +; X64-NEXT: adcq %r13, %r14 ; X64-NEXT: addq %rax, %rbx ; X64-NEXT: adcq %rdx, %r14 -; X64-NEXT: addq %r13, %rbx +; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload ; X64-NEXT: adcq %r8, %r14 -; X64-NEXT: adcq $0, %r12 +; X64-NEXT: adcq $0, %r10 ; X64-NEXT: adcq $0, %r15 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: movq 80(%rbp), %rdi -; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq 80(%r9), %rdi +; X64-NEXT: movq %r12, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: addq %r8, %rcx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq 88(%rbp), %r10 -; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq 88(%r9), %r9 +; X64-NEXT: movq %r12, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rcx, %r8 ; X64-NEXT: adcq %rsi, %rbp -; X64-NEXT: setb %r11b -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: setb %r12b +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %rsi ; X64-NEXT: addq %rbp, %rsi -; X64-NEXT: movzbl %r11b, %eax +; X64-NEXT: movzbl %r12b, %eax ; X64-NEXT: adcq %rax, %rcx ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: mulq %rdx -; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: addq %r9, %rbp +; X64-NEXT: movq %rdx, %r12 +; X64-NEXT: movq %rax, %r11 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %rdx, %rax -; X64-NEXT: addq %rsi, %rbp -; X64-NEXT: adcq %rcx, %rax -; X64-NEXT: addq %rbx, %r13 -; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %r11, %rax +; X64-NEXT: adcq %r12, %r13 +; X64-NEXT: addq %rsi, %rax +; X64-NEXT: adcq %rcx, %r13 +; X64-NEXT: addq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill ; X64-NEXT: adcq %r14, %r8 ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq $0, %rbp ; X64-NEXT: adcq $0, %rax -; X64-NEXT: addq %r12, %rbp -; X64-NEXT: movq %rbp, %r8 -; X64-NEXT: adcq %r15, %rax -; X64-NEXT: movq %rax, %r11 +; X64-NEXT: adcq $0, %r13 +; X64-NEXT: addq %r10, %rax +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: adcq %r15, %r13 ; X64-NEXT: setb %r14b -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %r15 -; X64-NEXT: movq %rax, %r12 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %r15 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r15, %rbx +; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, %rbx +; X64-NEXT: movq %rax, %rbp ; X64-NEXT: adcq %rsi, %rcx -; X64-NEXT: setb %sil -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: setb %bl +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %sil, %ecx +; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: addq %r9, %rsi +; X64-NEXT: addq %r11, %rsi ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: adcq %r12, %rcx ; X64-NEXT: addq %rax, %rsi ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %r8, %r12 -; X64-NEXT: movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %r11, %rbx -; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %r8, %r15 +; X64-NEXT: movq %r15, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %r13, %rbp +; X64-NEXT: movq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movzbl %r14b, %eax ; X64-NEXT: adcq %rax, %rsi ; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: imulq %rax, %r10 -; X64-NEXT: movq %rax, %r14 +; X64-NEXT: imulq %rax, %r9 +; X64-NEXT: movq %rax, %r10 ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %r10, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: addq %r9, %rdx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload ; X64-NEXT: imulq %rbp, %rdi ; X64-NEXT: addq %rdx, %rdi ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload @@ -7800,7 +7777,7 @@ ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rsi, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: imulq %rcx, %rax ; X64-NEXT: addq %rdx, %rax ; X64-NEXT: addq %r8, %r9 @@ -7808,11 +7785,11 @@ ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %rcx, %rdi -; X64-NEXT: mulq %r14 +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %r14 +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rcx, %rbx @@ -7833,12 +7810,11 @@ ; X64-NEXT: adcq %rax, %r12 ; X64-NEXT: addq %r9, %r13 ; X64-NEXT: adcq %r8, %r12 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload -; X64-NEXT: movq 120(%rdx), %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq 120(%rbp), %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload ; X64-NEXT: imulq %r10, %rcx -; X64-NEXT: movq 112(%rdx), %rsi -; X64-NEXT: movq %rdx, %rbp +; X64-NEXT: movq 112(%rbp), %rsi ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rax, %r11 @@ -7854,7 +7830,7 @@ ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: imulq %rdi, %rax ; X64-NEXT: addq %rdx, %rax ; X64-NEXT: addq %r11, %r9 @@ -7893,49 +7869,48 @@ ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload ; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq 80(%rsi), %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq 80(%r9), %rsi +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq 88(%rsi), %rax -; X64-NEXT: movq %rsi, %r9 -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq 88(%r9), %r8 +; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rcx, %r11 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r8, %rbx +; X64-NEXT: addq %rdi, %rbx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: addq %rbx, %r14 ; X64-NEXT: adcq %rbp, %rcx -; X64-NEXT: setb %r8b -; X64-NEXT: movq %rsi, %rax +; X64-NEXT: setb %r10b +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rcx, %rbx -; X64-NEXT: movzbl %r8b, %eax +; X64-NEXT: movzbl %r10b, %eax ; X64-NEXT: adcq %rax, %rbp -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload +; X64-NEXT: movq (%rsp), %r12 # 8-byte Reload ; X64-NEXT: addq %r12, %rsi ; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload @@ -7948,8 +7923,8 @@ ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq 72(%rdi), %r9 -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq 72(%rdi), %rdi +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx @@ -7959,11 +7934,10 @@ ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %rbp, %rcx ; X64-NEXT: setb %r11b -; X64-NEXT: movq %r9, %rax -; X64-NEXT: movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %rbp @@ -7981,20 +7955,20 @@ ; X64-NEXT: addq %rbp, %rcx ; X64-NEXT: adcq %rbx, %r8 ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, (%rsp) # 8-byte Spill ; X64-NEXT: adcq %r14, %r8 ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %r13, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp @@ -8007,74 +7981,72 @@ ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %dil -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %rbx ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: addq %r14, %r15 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload -; X64-NEXT: adcq %r13, %r11 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: addq %r9, %r15 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: adcq %r8, %r11 ; X64-NEXT: addq %rax, %r15 ; X64-NEXT: adcq %rdx, %r11 -; X64-NEXT: addq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq (%rsp), %r12 # 8-byte Folded Reload +; X64-NEXT: movq %r12, (%rsp) # 8-byte Spill ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload ; X64-NEXT: movq %rbp, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %r11 ; X64-NEXT: addq %rsi, %r15 ; X64-NEXT: adcq %r10, %r11 -; X64-NEXT: setb %r10b -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq %r8, %rdi -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r12 +; X64-NEXT: setb -{{[0-9]+}}(%rsp) # 1-byte Folded Spill +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %r14, %rsi +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r14 # 8-byte Reload +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: addq %r10, %rbx ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: adcq %rdi, %rcx -; X64-NEXT: setb %r8b -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %rdi -; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %r8b, %ecx +; X64-NEXT: movq %rax, %r12 +; X64-NEXT: adcq %rdi, %r10 +; X64-NEXT: setb %bl +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rcx, %rdi +; X64-NEXT: addq %r10, %rax +; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: addq %r14, %rsi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbx # 8-byte Reload +; X64-NEXT: addq %r9, %rbx ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: adcq %r13, %rcx -; X64-NEXT: addq %rax, %rsi +; X64-NEXT: adcq %r8, %rcx +; X64-NEXT: addq %rax, %rbx ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %r15, %r9 -; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %r11, %rbx -; X64-NEXT: movq %rbx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movzbl %r10b, %eax -; X64-NEXT: adcq %rax, %rsi -; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %r15, %rbp +; X64-NEXT: movq %rbp, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %r11, %r12 +; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax # 1-byte Folded Reload +; X64-NEXT: adcq %rax, %rbx +; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload ; X64-NEXT: movq 96(%rbp), %rcx ; X64-NEXT: imulq %rcx, %rdi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r12, %rsi ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rdi, %rdx @@ -8100,7 +8072,7 @@ ; X64-NEXT: movq %rbx, %rsi ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: movq %rbp, %r9 ; X64-NEXT: mulq %rcx @@ -8125,32 +8097,31 @@ ; X64-NEXT: addq %r10, %rbp ; X64-NEXT: adcq %rdi, %rbx ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: imulq %rax, %rsi -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rax, %r8 +; X64-NEXT: imulq %r13, %rsi +; X64-NEXT: movq %r13, %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rsi, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload -; X64-NEXT: imulq %r11, %rcx -; X64-NEXT: addq %rdx, %rcx -; X64-NEXT: movq %rcx, %r9 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: imulq %r11, %r8 +; X64-NEXT: addq %rdx, %r8 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload ; X64-NEXT: imulq %r15, %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: mulq %r14 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rax, %r10 ; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: imulq %r14, %rax +; X64-NEXT: movq %r14, %rax +; X64-NEXT: imulq %rdi, %rax ; X64-NEXT: addq %rdx, %rax -; X64-NEXT: addq %r8, %r10 -; X64-NEXT: adcq %r9, %rax +; X64-NEXT: addq %r9, %r10 +; X64-NEXT: adcq %r8, %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r14, %rax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %r14 ; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %r8 @@ -8174,7 +8145,7 @@ ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: addq %r10, %rax ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload -; X64-NEXT: addq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload +; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload ; X64-NEXT: adcq %r12, %rsi ; X64-NEXT: adcq %rbp, %rax ; X64-NEXT: adcq %rbx, %rdx @@ -8184,23 +8155,23 @@ ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload +; X64-NEXT: movq (%rsp), %rbp # 8-byte Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload ; X64-NEXT: movq %rcx, %r9 ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload ; X64-NEXT: movq %rdi, %r10 -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload -; X64-NEXT: adcq (%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload @@ -8236,7 +8207,7 @@ ; X64-NEXT: popq %r14 ; X64-NEXT: popq %r15 ; X64-NEXT: popq %rbp -; X64-NEXT: retq +; X64-NEXT: ret{{[l|q]}} %av = load i1024, i1024* %a %bv = load i1024, i1024* %b %r = mul i1024 %av, %bv Index: test/CodeGen/X86/mul-i256.ll =================================================================== --- test/CodeGen/X86/mul-i256.ll +++ test/CodeGen/X86/mul-i256.ll @@ -125,11 +125,10 @@ ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl 8(%ecx), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl 8(%edi), %ebx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %esi, %edi +; X32-NEXT: movl %esi, %ecx ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill @@ -139,9 +138,10 @@ ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl 12(%ecx), %ecx -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl 12(%edi), %edi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edi, %ecx ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %esi, %eax @@ -353,7 +353,7 @@ ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: popl %ebp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; X64-LABEL: test: ; X64: # BB#0: # %entry @@ -423,7 +423,7 @@ ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r14 ; X64-NEXT: popq %r15 -; X64-NEXT: retq +; X64-NEXT: ret{{[l|q]}} entry: %av = load i256, i256* %a %bv = load i256, i256* %b Index: test/CodeGen/X86/mul-i512.ll =================================================================== --- test/CodeGen/X86/mul-i512.ll +++ test/CodeGen/X86/mul-i512.ll @@ -28,7 +28,7 @@ ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl %esi, %edi ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl %ecx, %ebx ; X32-NEXT: movl %ecx, %edi ; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill @@ -65,7 +65,7 @@ ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl %ebp, %ebx -; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx @@ -121,8 +121,8 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload @@ -164,7 +164,7 @@ ; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl %edi, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: adcl %esi, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -214,7 +214,7 @@ ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edx, %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl (%esp), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill @@ -224,7 +224,7 @@ ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 28(%eax), %esi @@ -287,7 +287,7 @@ ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill @@ -304,7 +304,7 @@ ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 12(%eax), %ecx -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ecx @@ -313,7 +313,7 @@ ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: mull (%esp) # 4-byte Folded Reload +; X32-NEXT: mull {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx @@ -343,7 +343,7 @@ ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax @@ -459,7 +459,7 @@ ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax @@ -498,7 +498,7 @@ ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %edi, %eax @@ -524,7 +524,7 @@ ; X32-NEXT: adcl %eax, %ebp ; X32-NEXT: adcl $0, %edi ; X32-NEXT: addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload ; X32-NEXT: addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload @@ -544,7 +544,7 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ecx @@ -602,7 +602,7 @@ ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill @@ -611,19 +611,19 @@ ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 28(%eax), %ebp ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ebp -; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ebp, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill @@ -661,7 +661,7 @@ ; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl (%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill @@ -793,7 +793,7 @@ ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl (%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ecx, %eax @@ -834,7 +834,7 @@ ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl (%esp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebp @@ -887,7 +887,7 @@ ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl (%esp), %ebp # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi @@ -948,7 +948,6 @@ ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl %ebx, %esi ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload @@ -967,19 +966,19 @@ ; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 40(%eax), %ebp -; X32-NEXT: movl %eax, %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl 44(%ebx), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 44(%eax), %ebx ; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx @@ -1023,7 +1022,7 @@ ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %esi @@ -1075,14 +1074,14 @@ ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl (%esp), %esi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: imull %eax, %esi ; X32-NEXT: addl %edx, %esi ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, %edi ; X32-NEXT: adcl %ebp, %esi -; X32-NEXT: movl %esi, %edi +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp @@ -1106,16 +1105,15 @@ ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl %edi, %edx +; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl 60(%edx), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl 60(%edi), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: imull %eax, %ecx -; X32-NEXT: movl 56(%edx), %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl 56(%edi), %esi ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: addl %ecx, %edx @@ -1129,13 +1127,13 @@ ; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: mull %ebx ; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: imull %ebx, %ecx ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx @@ -1161,7 +1159,7 @@ ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload @@ -1182,7 +1180,7 @@ ; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl 44(%ecx), %ecx ; X32-NEXT: movl %ecx, %eax @@ -1217,7 +1215,7 @@ ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl 32(%esi), %edi ; X32-NEXT: movl %edi, %eax @@ -1261,17 +1259,17 @@ ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl %ebp, %eax -; X32-NEXT: addl (%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi @@ -1300,28 +1298,28 @@ ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, (%esp) # 4-byte Folded Spill +; X32-NEXT: addl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx @@ -1329,11 +1327,11 @@ ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill +; X32-NEXT: setb (%esp) # 1-byte Folded Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload @@ -1471,7 +1469,7 @@ ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload @@ -1487,7 +1485,7 @@ ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload @@ -1515,7 +1513,7 @@ ; X32-NEXT: movl %edi, 32(%ecx) ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, 36(%ecx) -; X32-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, 40(%ecx) ; X32-NEXT: movl %esi, 44(%ecx) ; X32-NEXT: movl %edx, 48(%ecx) @@ -1527,7 +1525,7 @@ ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: popl %ebp -; X32-NEXT: retl +; X32-NEXT: ret{{[l|q]}} ; ; X64-LABEL: test_512: ; X64: # BB#0: @@ -1683,14 +1681,14 @@ ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload -; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload +; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r15, %rbx +; X64-NEXT: addq %r10, %rbx ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rdi @@ -1699,7 +1697,7 @@ ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil -; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx @@ -1771,12 +1769,11 @@ ; X64-NEXT: adcq %rax, %r11 ; X64-NEXT: addq %r14, %r9 ; X64-NEXT: adcq %rbx, %r11 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload -; X64-NEXT: movq 56(%rdx), %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq 56(%rbp), %rcx ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload ; X64-NEXT: imulq %r10, %rcx -; X64-NEXT: movq 48(%rdx), %rbx -; X64-NEXT: movq %rdx, %rbp +; X64-NEXT: movq 48(%rbp), %rbx ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rax, %rsi @@ -1850,7 +1847,7 @@ ; X64-NEXT: popq %r14 ; X64-NEXT: popq %r15 ; X64-NEXT: popq %rbp -; X64-NEXT: retq +; X64-NEXT: ret{{[l|q]}} %av = load i512, i512* %a %bv = load i512, i512* %b %r = mul i512 %av, %bv Index: test/CodeGen/X86/mul128.ll =================================================================== --- test/CodeGen/X86/mul128.ll +++ test/CodeGen/X86/mul128.ll @@ -6,8 +6,8 @@ ; X64-LABEL: foo: ; X64: # BB#0: ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: imulq %rdi, %rcx ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: imulq %rax, %rcx ; X64-NEXT: mulq %r8 ; X64-NEXT: addq %rcx, %rdx ; X64-NEXT: imulq %r8, %rsi Index: test/CodeGen/X86/mul64.ll =================================================================== --- test/CodeGen/X86/mul64.ll +++ test/CodeGen/X86/mul64.ll @@ -19,8 +19,8 @@ ; ; X64-LABEL: foo: ; X64: # BB#0: -; X64-NEXT: imulq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: imulq %rsi, %rax ; X64-NEXT: retq %k = mul i64 %t, %u ret i64 %k Index: test/CodeGen/X86/mwaitx.ll =================================================================== --- test/CodeGen/X86/mwaitx.ll +++ test/CodeGen/X86/mwaitx.ll @@ -4,8 +4,8 @@ ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=bdver4 | FileCheck %s -check-prefix=WIN64 ; CHECK-LABEL: foo: -; CHECK: leaq (%rdi), %rax -; CHECK-NEXT: movl %esi, %ecx +; CHECK: movl %esi, %ecx +; CHECK-NEXT: leaq (%rdi), %rax ; CHECK-NEXT: monitorx ; WIN64-LABEL: foo: ; WIN64: leaq (%rcx), %rax @@ -21,13 +21,13 @@ declare void @llvm.x86.monitorx(i8*, i32, i32) nounwind ; CHECK-LABEL: bar: -; CHECK: movl %edi, %ecx +; CHECK: movl %edx, %ebx ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: movl %edx, %ebx +; CHECK-NEXT: movl %edi, %ecx ; CHECK-NEXT: mwaitx ; WIN64-LABEL: bar: -; WIN64: movl %edx, %eax ; WIN64: movl %r8d, %ebx +; WIN64: movl %edx, %eax ; WIN64-NEXT: mwaitx define void @bar(i32 %E, i32 %H, i32 %C) nounwind { entry: Index: test/CodeGen/X86/negate-i1.ll =================================================================== --- test/CodeGen/X86/negate-i1.ll +++ test/CodeGen/X86/negate-i1.ll @@ -5,9 +5,10 @@ define i8 @select_i8_neg1_or_0(i1 %a) { ; X64-LABEL: select_i8_neg1_or_0: ; X64: # BB#0: -; X64-NEXT: andb $1, %dil -; X64-NEXT: negb %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $1, %al +; X64-NEXT: negb %al +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq ; ; X32-LABEL: select_i8_neg1_or_0: @@ -23,8 +24,9 @@ define i8 @select_i8_neg1_or_0_zeroext(i1 zeroext %a) { ; X64-LABEL: select_i8_neg1_or_0_zeroext: ; X64: # BB#0: -; X64-NEXT: negb %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: negb %al +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq ; ; X32-LABEL: select_i8_neg1_or_0_zeroext: @@ -39,9 +41,10 @@ define i16 @select_i16_neg1_or_0(i1 %a) { ; X64-LABEL: select_i16_neg1_or_0: ; X64: # BB#0: -; X64-NEXT: andl $1, %edi -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $1, %eax +; X64-NEXT: negl %eax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq ; ; X32-LABEL: select_i16_neg1_or_0: @@ -58,8 +61,9 @@ define i16 @select_i16_neg1_or_0_zeroext(i1 zeroext %a) { ; X64-LABEL: select_i16_neg1_or_0_zeroext: ; X64: # BB#0: -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq ; ; X32-LABEL: select_i16_neg1_or_0_zeroext: @@ -75,9 +79,9 @@ define i32 @select_i32_neg1_or_0(i1 %a) { ; X64-LABEL: select_i32_neg1_or_0: ; X64: # BB#0: -; X64-NEXT: andl $1, %edi -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $1, %eax +; X64-NEXT: negl %eax ; X64-NEXT: retq ; ; X32-LABEL: select_i32_neg1_or_0: @@ -93,8 +97,8 @@ define i32 @select_i32_neg1_or_0_zeroext(i1 zeroext %a) { ; X64-LABEL: select_i32_neg1_or_0_zeroext: ; X64: # BB#0: -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: negl %eax ; X64-NEXT: retq ; ; X32-LABEL: select_i32_neg1_or_0_zeroext: @@ -109,10 +113,9 @@ define i64 @select_i64_neg1_or_0(i1 %a) { ; X64-LABEL: select_i64_neg1_or_0: ; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: andl $1, %edi -; X64-NEXT: negq %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $1, %eax +; X64-NEXT: negq %rax ; X64-NEXT: retq ; ; X32-LABEL: select_i64_neg1_or_0: Index: test/CodeGen/X86/negate-shift.ll =================================================================== --- test/CodeGen/X86/negate-shift.ll +++ test/CodeGen/X86/negate-shift.ll @@ -4,8 +4,8 @@ define i32 @neg_lshr_signbit(i32 %x) { ; X64-LABEL: neg_lshr_signbit: ; X64: # BB#0: -; X64-NEXT: sarl $31, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarl $31, %eax ; X64-NEXT: retq %sh = lshr i32 %x, 31 %neg = sub i32 0, %sh @@ -15,8 +15,8 @@ define i64 @neg_ashr_signbit(i64 %x) { ; X64-LABEL: neg_ashr_signbit: ; X64: # BB#0: -; X64-NEXT: shrq $63, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $63, %rax ; X64-NEXT: retq %sh = ashr i64 %x, 63 %neg = sub i64 0, %sh Index: test/CodeGen/X86/negate.ll =================================================================== --- test/CodeGen/X86/negate.ll +++ test/CodeGen/X86/negate.ll @@ -42,8 +42,9 @@ define i8 @negate_zero_or_minsigned(i8 %x) { ; CHECK-LABEL: negate_zero_or_minsigned: ; CHECK: # BB#0: -; CHECK-NEXT: shlb $7, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlb $7, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %signbit = shl i8 %x, 7 %neg = sub i8 0, %signbit Index: test/CodeGen/X86/no-sse2-avg.ll =================================================================== --- test/CodeGen/X86/no-sse2-avg.ll +++ test/CodeGen/X86/no-sse2-avg.ll @@ -5,9 +5,9 @@ define <16 x i8> @PR27973() { ; CHECK-LABEL: PR27973: ; CHECK: # BB#0: -; CHECK-NEXT: movq $0, 8(%rdi) -; CHECK-NEXT: movq $0, (%rdi) ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq $0, 8(%rax) +; CHECK-NEXT: movq $0, (%rax) ; CHECK-NEXT: retq %t0 = zext <16 x i8> zeroinitializer to <16 x i32> %t1 = add nuw nsw <16 x i32> %t0, Index: test/CodeGen/X86/not-and-simplify.ll =================================================================== --- test/CodeGen/X86/not-and-simplify.ll +++ test/CodeGen/X86/not-and-simplify.ll @@ -7,9 +7,9 @@ define i32 @shrink_xor_constant1(i32 %x) { ; ALL-LABEL: shrink_xor_constant1: ; ALL: # BB#0: -; ALL-NEXT: shrl $31, %edi -; ALL-NEXT: xorl $1, %edi ; ALL-NEXT: movl %edi, %eax +; ALL-NEXT: shrl $31, %eax +; ALL-NEXT: xorl $1, %eax ; ALL-NEXT: retq %sh = lshr i32 %x, 31 %not = xor i32 %sh, -1 @@ -34,9 +34,10 @@ define i8 @shrink_xor_constant2(i8 %x) { ; ALL-LABEL: shrink_xor_constant2: ; ALL: # BB#0: -; ALL-NEXT: shlb $5, %dil -; ALL-NEXT: xorb $-32, %dil ; ALL-NEXT: movl %edi, %eax +; ALL-NEXT: shlb $5, %al +; ALL-NEXT: xorb $-32, %al +; ALL-NEXT: # kill: %AL %AL %EAX ; ALL-NEXT: retq %sh = shl i8 %x, 5 %not = xor i8 %sh, -1 Index: test/CodeGen/X86/palignr.ll =================================================================== --- test/CodeGen/X86/palignr.ll +++ test/CodeGen/X86/palignr.ll @@ -167,16 +167,15 @@ ; CHECK-SSE2-LABEL: test9: ; CHECK-SSE2: # BB#0: ; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 -; CHECK-SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero -; CHECK-SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; CHECK-SSE2-NEXT: por %xmm0, %xmm1 -; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 +; CHECK-SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero +; CHECK-SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1] +; CHECK-SSE2-NEXT: por %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retl ; ; CHECK-SSSE3-LABEL: test9: ; CHECK-SSSE3: # BB#0: -; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] ; CHECK-SSSE3-NEXT: movdqa %xmm1, %xmm0 +; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] ; CHECK-SSSE3-NEXT: retl ; ; CHECK-AVX-LABEL: test9: Index: test/CodeGen/X86/peep-setb.ll =================================================================== --- test/CodeGen/X86/peep-setb.ll +++ test/CodeGen/X86/peep-setb.ll @@ -7,9 +7,10 @@ define i8 @test1(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: test1: ; CHECK: # BB#0: -; CHECK-NEXT: cmpb %sil, %dil -; CHECK-NEXT: adcb $0, %sil ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpb %al, %dil +; CHECK-NEXT: adcb $0, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %cmp = icmp ult i8 %a, %b %cond = zext i1 %cmp to i8 @@ -20,9 +21,9 @@ define i32 @test2(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test2: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: adcl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: adcl $0, %eax ; CHECK-NEXT: retq %cmp = icmp ult i32 %a, %b %cond = zext i1 %cmp to i32 @@ -33,9 +34,9 @@ define i64 @test3(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test3: ; CHECK: # BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: adcq $0, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: adcq $0, %rax ; CHECK-NEXT: retq %cmp = icmp ult i64 %a, %b %conv = zext i1 %cmp to i64 @@ -46,9 +47,10 @@ define i8 @test4(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: test4: ; CHECK: # BB#0: -; CHECK-NEXT: cmpb %sil, %dil -; CHECK-NEXT: sbbb $0, %sil ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpb %al, %dil +; CHECK-NEXT: sbbb $0, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %cmp = icmp ult i8 %a, %b %cond = zext i1 %cmp to i8 @@ -59,9 +61,9 @@ define i32 @test5(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test5: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: sbbl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: sbbl $0, %eax ; CHECK-NEXT: retq %cmp = icmp ult i32 %a, %b %cond = zext i1 %cmp to i32 @@ -72,9 +74,9 @@ define i64 @test6(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test6: ; CHECK: # BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: sbbq $0, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: sbbq $0, %rax ; CHECK-NEXT: retq %cmp = icmp ult i64 %a, %b %conv = zext i1 %cmp to i64 @@ -85,9 +87,10 @@ define i8 @test7(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: test7: ; CHECK: # BB#0: -; CHECK-NEXT: cmpb %sil, %dil -; CHECK-NEXT: adcb $0, %sil ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpb %al, %dil +; CHECK-NEXT: adcb $0, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %cmp = icmp ult i8 %a, %b %cond = sext i1 %cmp to i8 @@ -98,9 +101,9 @@ define i32 @test8(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test8: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: adcl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: adcl $0, %eax ; CHECK-NEXT: retq %cmp = icmp ult i32 %a, %b %cond = sext i1 %cmp to i32 @@ -111,9 +114,9 @@ define i64 @test9(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test9: ; CHECK: # BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: adcq $0, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: adcq $0, %rax ; CHECK-NEXT: retq %cmp = icmp ult i64 %a, %b %conv = sext i1 %cmp to i64 Index: test/CodeGen/X86/pku.ll =================================================================== --- test/CodeGen/X86/pku.ll +++ test/CodeGen/X86/pku.ll @@ -5,9 +5,9 @@ define void @test_x86_wrpkru(i32 %src) { ; CHECK-LABEL: test_x86_wrpkru: ; CHECK: ## BB#0: +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: wrpkru ; CHECK-NEXT: retq call void @llvm.x86.wrpkru(i32 %src) Index: test/CodeGen/X86/pr12312.ll =================================================================== --- test/CodeGen/X86/pr12312.ll +++ test/CodeGen/X86/pr12312.ll @@ -177,16 +177,16 @@ define i32 @vecsel128(<4 x i32> %input, i32 %a, i32 %b) { ; SSE41-LABEL: vecsel128: ; SSE41: # BB#0: -; SSE41-NEXT: ptest %xmm0, %xmm0 -; SSE41-NEXT: cmovel %esi, %edi ; SSE41-NEXT: movl %edi, %eax +; SSE41-NEXT: ptest %xmm0, %xmm0 +; SSE41-NEXT: cmovel %esi, %eax ; SSE41-NEXT: retq ; ; AVX-LABEL: vecsel128: ; AVX: # BB#0: -; AVX-NEXT: vptest %xmm0, %xmm0 -; AVX-NEXT: cmovel %esi, %edi ; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: vptest %xmm0, %xmm0 +; AVX-NEXT: cmovel %esi, %eax ; AVX-NEXT: retq %t0 = bitcast <4 x i32> %input to i128 %t1 = icmp ne i128 %t0, 0 @@ -197,17 +197,17 @@ define i32 @vecsel256(<8 x i32> %input, i32 %a, i32 %b) { ; SSE41-LABEL: vecsel256: ; SSE41: # BB#0: +; SSE41-NEXT: movl %edi, %eax ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: ptest %xmm0, %xmm0 -; SSE41-NEXT: cmovel %esi, %edi -; SSE41-NEXT: movl %edi, %eax +; SSE41-NEXT: cmovel %esi, %eax ; SSE41-NEXT: retq ; ; AVX-LABEL: vecsel256: ; AVX: # BB#0: -; AVX-NEXT: vptest %ymm0, %ymm0 -; AVX-NEXT: cmovel %esi, %edi ; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: vptest %ymm0, %ymm0 +; AVX-NEXT: cmovel %esi, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq %t0 = bitcast <8 x i32> %input to i256 @@ -219,20 +219,20 @@ define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) { ; SSE41-LABEL: vecsel512: ; SSE41: # BB#0: +; SSE41-NEXT: movl %edi, %eax ; SSE41-NEXT: por %xmm3, %xmm1 ; SSE41-NEXT: por %xmm2, %xmm1 ; SSE41-NEXT: por %xmm0, %xmm1 ; SSE41-NEXT: ptest %xmm1, %xmm1 -; SSE41-NEXT: cmovel %esi, %edi -; SSE41-NEXT: movl %edi, %eax +; SSE41-NEXT: cmovel %esi, %eax ; SSE41-NEXT: retq ; ; AVX-LABEL: vecsel512: ; AVX: # BB#0: +; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vptest %ymm0, %ymm0 -; AVX-NEXT: cmovel %esi, %edi -; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: cmovel %esi, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq %t0 = bitcast <16 x i32> %input to i512 Index: test/CodeGen/X86/pr15705.ll =================================================================== --- test/CodeGen/X86/pr15705.ll +++ test/CodeGen/X86/pr15705.ll @@ -22,14 +22,14 @@ ; ; X64-LABEL: PR15705: ; X64: # BB#0: # %entry +; X64-NEXT: movl %edx, %eax ; X64-NEXT: cmpl %esi, %edi ; X64-NEXT: je .LBB0_2 ; X64-NEXT: # BB#1: # %if.end -; X64-NEXT: cmpl %edx, %edi +; X64-NEXT: cmpl %eax, %edi ; X64-NEXT: cmovel %ecx, %esi -; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %esi, %eax ; X64-NEXT: .LBB0_2: # %return -; X64-NEXT: movl %edx, %eax ; X64-NEXT: retq entry: %cmp = icmp eq i32 %x, %a Index: test/CodeGen/X86/pr15981.ll =================================================================== --- test/CodeGen/X86/pr15981.ll +++ test/CodeGen/X86/pr15981.ll @@ -19,9 +19,9 @@ ; ; X64-LABEL: fn1: ; X64: # BB#0: -; X64-NEXT: testl %esi, %esi -; X64-NEXT: cmovel %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: testl %esi, %esi +; X64-NEXT: cmovel %esi, %eax ; X64-NEXT: retq %3 = icmp ne i32 %1, 0 %4 = select i1 %3, i32 %0, i32 0 Index: test/CodeGen/X86/pr23664.ll =================================================================== --- test/CodeGen/X86/pr23664.ll +++ test/CodeGen/X86/pr23664.ll @@ -7,8 +7,9 @@ ret i2 %or ; CHECK-LABEL: f: -; CHECK: addb %dil, %dil -; CHECK-NEXT: orb $1, %dil -; CHECK-NEXT: movl %edi, %eax +; CHECK: movl %edi, %eax +; CHECK-NEXT: addb %al, %al +; CHECK-NEXT: orb $1, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq } Index: test/CodeGen/X86/pr28173.ll =================================================================== --- test/CodeGen/X86/pr28173.ll +++ test/CodeGen/X86/pr28173.ll @@ -78,8 +78,9 @@ define i8 @foo8(i1 zeroext %i) #0 { ; CHECK-LABEL: foo8: ; CHECK: # BB#0: -; CHECK-NEXT: orb $-2, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orb $-2, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq br label %bb Index: test/CodeGen/X86/pr34657.ll =================================================================== --- test/CodeGen/X86/pr34657.ll +++ test/CodeGen/X86/pr34657.ll @@ -1,17 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw -o - | FileCheck %s +; RUN: llc %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw -o - | FileCheck %s define <112 x i8> @pr34657() local_unnamed_addr { -; CHECK-LABEL: pr34657 +; CHECK-LABEL: pr34657: ; CHECK: # BB#0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: vmovups (%rax), %xmm0 ; CHECK-NEXT: vmovups (%rax), %ymm1 ; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 ; CHECK-NEXT: vmovups (%rax), %zmm2 -; CHECK-NEXT: vmovaps %ymm1, 64(%rdi) -; CHECK-NEXT: vmovaps %zmm2, (%rdi) -; CHECK-NEXT: vextractf32x4 $2, %zmm0, 96(%rdi) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: vmovaps %ymm1, 64(%rax) +; CHECK-NEXT: vmovaps %zmm2, (%rax) +; CHECK-NEXT: vextractf32x4 $2, %zmm0, 96(%rax) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq entry: Index: test/CodeGen/X86/replace-load-and-with-bzhi.ll =================================================================== --- test/CodeGen/X86/replace-load-and-with-bzhi.ll +++ test/CodeGen/X86/replace-load-and-with-bzhi.ll @@ -10,9 +10,9 @@ define i32 @f32_bzhi(i32 %x, i32 %y) local_unnamed_addr { ; CHECK-LABEL: f32_bzhi: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movslq %esi, %rax -; CHECK-NEXT: andl fill_table32(,%rax,4), %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movslq %esi, %rcx +; CHECK-NEXT: andl fill_table32(,%rcx,4), %eax ; CHECK-NEXT: ret{{[l|q]}} ; ; CHECK32-LABEL: f32_bzhi: @@ -32,9 +32,9 @@ define i32 @f32_bzhi_partial(i32 %x, i32 %y) local_unnamed_addr { ; CHECK-LABEL: f32_bzhi_partial: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movslq %esi, %rax -; CHECK-NEXT: andl fill_table32_partial(,%rax,4), %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movslq %esi, %rcx +; CHECK-NEXT: andl fill_table32_partial(,%rcx,4), %eax ; CHECK-NEXT: ret{{[l|q]}} ; ; CHECK32-LABEL: f32_bzhi_partial: @@ -54,8 +54,8 @@ define i64 @f64_bzhi(i64 %x, i64 %y) local_unnamed_addr { ; CHECK-LABEL: f64_bzhi: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: andq fill_table64(,%rsi,8), %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: andq fill_table64(,%rsi,8), %rax ; CHECK-NEXT: ret{{[l|q]}} ; ; CHECK32-LABEL: f64_bzhi: @@ -76,8 +76,8 @@ define i64 @f64_bzhi_partial(i64 %x, i64 %y) local_unnamed_addr { ; CHECK-LABEL: f64_bzhi_partial: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: andq fill_table64_partial(,%rsi,8), %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: andq fill_table64_partial(,%rsi,8), %rax ; CHECK-NEXT: ret{{[l|q]}} ; ; CHECK32-LABEL: f64_bzhi_partial: Index: test/CodeGen/X86/rot16.ll =================================================================== --- test/CodeGen/X86/rot16.ll +++ test/CodeGen/X86/rot16.ll @@ -13,8 +13,10 @@ ; X64-LABEL: foo: ; X64: # BB#0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldw %cl, %di, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shldw %cl, %ax, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %t0 = shl i16 %x, %z %t1 = sub i16 16, %z @@ -35,8 +37,10 @@ ; X64-LABEL: bar: ; X64: # BB#0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldw %cl, %di, %si ; X64-NEXT: movl %esi, %eax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shldw %cl, %di, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %t0 = shl i16 %y, %z %t1 = sub i16 16, %z @@ -56,8 +60,10 @@ ; X64-LABEL: un: ; X64: # BB#0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdw %cl, %di, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shrdw %cl, %ax, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %t0 = lshr i16 %x, %z %t1 = sub i16 16, %z @@ -78,8 +84,10 @@ ; X64-LABEL: bu: ; X64: # BB#0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdw %cl, %di, %si ; X64-NEXT: movl %esi, %eax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shrdw %cl, %di, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %t0 = lshr i16 %y, %z %t1 = sub i16 16, %z @@ -97,8 +105,9 @@ ; ; X64-LABEL: xfoo: ; X64: # BB#0: -; X64-NEXT: rolw $5, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolw $5, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %t0 = lshr i16 %x, 11 %t1 = shl i16 %x, 5 @@ -116,8 +125,9 @@ ; ; X64-LABEL: xbar: ; X64: # BB#0: -; X64-NEXT: shldw $5, %di, %si ; X64-NEXT: movl %esi, %eax +; X64-NEXT: shldw $5, %di, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %t0 = shl i16 %y, 5 %t1 = lshr i16 %x, 11 @@ -134,8 +144,9 @@ ; ; X64-LABEL: xun: ; X64: # BB#0: -; X64-NEXT: rolw $11, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolw $11, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %t0 = lshr i16 %x, 5 %t1 = shl i16 %x, 11 @@ -153,8 +164,9 @@ ; ; X64-LABEL: xbu: ; X64: # BB#0: -; X64-NEXT: shldw $11, %si, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shldw $11, %si, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %t0 = lshr i16 %y, 5 %t1 = shl i16 %x, 11 Index: test/CodeGen/X86/rot64.ll =================================================================== --- test/CodeGen/X86/rot64.ll +++ test/CodeGen/X86/rot64.ll @@ -6,9 +6,10 @@ define i64 @foo(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: foo: ; ALL: # BB#0: # %entry -; ALL-NEXT: movl %edx, %ecx -; ALL-NEXT: rolq %cl, %rdi +; ALL-NEXT: movq %rdx, %rcx ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: # kill: %CL %CL %RCX +; ALL-NEXT: rolq %cl, %rax ; ALL-NEXT: retq entry: %0 = shl i64 %x, %z @@ -21,9 +22,10 @@ define i64 @bar(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: bar: ; ALL: # BB#0: # %entry -; ALL-NEXT: movl %edx, %ecx -; ALL-NEXT: shldq %cl, %rdi, %rsi +; ALL-NEXT: movq %rdx, %rcx ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: # kill: %CL %CL %RCX +; ALL-NEXT: shldq %cl, %rdi, %rax ; ALL-NEXT: retq entry: %0 = shl i64 %y, %z @@ -36,9 +38,10 @@ define i64 @un(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: un: ; ALL: # BB#0: # %entry -; ALL-NEXT: movl %edx, %ecx -; ALL-NEXT: rorq %cl, %rdi +; ALL-NEXT: movq %rdx, %rcx ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: # kill: %CL %CL %RCX +; ALL-NEXT: rorq %cl, %rax ; ALL-NEXT: retq entry: %0 = lshr i64 %x, %z @@ -51,9 +54,10 @@ define i64 @bu(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: bu: ; ALL: # BB#0: # %entry -; ALL-NEXT: movl %edx, %ecx -; ALL-NEXT: shrdq %cl, %rdi, %rsi +; ALL-NEXT: movq %rdx, %rcx ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: # kill: %CL %CL %RCX +; ALL-NEXT: shrdq %cl, %rdi, %rax ; ALL-NEXT: retq entry: %0 = lshr i64 %y, %z @@ -66,14 +70,14 @@ define i64 @xfoo(i64 %x, i64 %y, i64 %z) nounwind readnone { ; X64-LABEL: xfoo: ; X64: # BB#0: # %entry -; X64-NEXT: rolq $7, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: rolq $7, %rax ; X64-NEXT: retq ; ; SHLD-LABEL: xfoo: ; SHLD: # BB#0: # %entry -; SHLD-NEXT: shldq $7, %rdi, %rdi ; SHLD-NEXT: movq %rdi, %rax +; SHLD-NEXT: shldq $7, %rax, %rax ; SHLD-NEXT: retq ; ; BMI2-LABEL: xfoo: @@ -115,8 +119,8 @@ define i64 @xbar(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: xbar: ; ALL: # BB#0: # %entry -; ALL-NEXT: shrdq $57, %rsi, %rdi ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: shrdq $57, %rsi, %rax ; ALL-NEXT: retq entry: %0 = shl i64 %y, 7 @@ -128,14 +132,14 @@ define i64 @xun(i64 %x, i64 %y, i64 %z) nounwind readnone { ; X64-LABEL: xun: ; X64: # BB#0: # %entry -; X64-NEXT: rolq $57, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: rolq $57, %rax ; X64-NEXT: retq ; ; SHLD-LABEL: xun: ; SHLD: # BB#0: # %entry -; SHLD-NEXT: shldq $57, %rdi, %rdi ; SHLD-NEXT: movq %rdi, %rax +; SHLD-NEXT: shldq $57, %rax, %rax ; SHLD-NEXT: retq ; ; BMI2-LABEL: xun: @@ -177,8 +181,8 @@ define i64 @xbu(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: xbu: ; ALL: # BB#0: # %entry -; ALL-NEXT: shldq $57, %rsi, %rdi ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: shldq $57, %rsi, %rax ; ALL-NEXT: retq entry: %0 = lshr i64 %y, 7 Index: test/CodeGen/X86/rotate.ll =================================================================== --- test/CodeGen/X86/rotate.ll +++ test/CodeGen/X86/rotate.ll @@ -43,8 +43,9 @@ ; 64-LABEL: rotl64: ; 64: # BB#0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rolq %cl, %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: # kill: %CL %CL %ECX +; 64-NEXT: rolq %cl, %rax ; 64-NEXT: retq %shift.upgrd.1 = zext i8 %Amt to i64 %B = shl i64 %A, %shift.upgrd.1 @@ -96,8 +97,9 @@ ; 64-LABEL: rotr64: ; 64: # BB#0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rorq %cl, %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: # kill: %CL %CL %ECX +; 64-NEXT: rorq %cl, %rax ; 64-NEXT: retq %shift.upgrd.3 = zext i8 %Amt to i64 %B = lshr i64 %A, %shift.upgrd.3 @@ -120,8 +122,8 @@ ; ; 64-LABEL: rotli64: ; 64: # BB#0: -; 64-NEXT: rolq $5, %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: rolq $5, %rax ; 64-NEXT: retq %B = shl i64 %A, 5 %C = lshr i64 %A, 59 @@ -141,8 +143,8 @@ ; ; 64-LABEL: rotri64: ; 64: # BB#0: -; 64-NEXT: rolq $59, %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: rolq $59, %rax ; 64-NEXT: retq %B = lshr i64 %A, 5 %C = shl i64 %A, 59 @@ -162,8 +164,8 @@ ; ; 64-LABEL: rotl1_64: ; 64: # BB#0: -; 64-NEXT: rolq %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: rolq %rax ; 64-NEXT: retq %B = shl i64 %A, 1 %C = lshr i64 %A, 63 @@ -183,8 +185,8 @@ ; ; 64-LABEL: rotr1_64: ; 64: # BB#0: -; 64-NEXT: rorq %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: rorq %rax ; 64-NEXT: retq %B = shl i64 %A, 63 %C = lshr i64 %A, 1 @@ -203,8 +205,9 @@ ; 64-LABEL: rotl32: ; 64: # BB#0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: roll %cl, %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: %CL %CL %ECX +; 64-NEXT: roll %cl, %eax ; 64-NEXT: retq %shift.upgrd.1 = zext i8 %Amt to i32 %B = shl i32 %A, %shift.upgrd.1 @@ -226,8 +229,9 @@ ; 64-LABEL: rotr32: ; 64: # BB#0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rorl %cl, %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: %CL %CL %ECX +; 64-NEXT: rorl %cl, %eax ; 64-NEXT: retq %shift.upgrd.3 = zext i8 %Amt to i32 %B = lshr i32 %A, %shift.upgrd.3 @@ -247,8 +251,8 @@ ; ; 64-LABEL: rotli32: ; 64: # BB#0: -; 64-NEXT: roll $5, %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: roll $5, %eax ; 64-NEXT: retq %B = shl i32 %A, 5 %C = lshr i32 %A, 27 @@ -265,8 +269,8 @@ ; ; 64-LABEL: rotri32: ; 64: # BB#0: -; 64-NEXT: roll $27, %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: roll $27, %eax ; 64-NEXT: retq %B = lshr i32 %A, 5 %C = shl i32 %A, 27 @@ -283,8 +287,8 @@ ; ; 64-LABEL: rotl1_32: ; 64: # BB#0: -; 64-NEXT: roll %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: roll %eax ; 64-NEXT: retq %B = shl i32 %A, 1 %C = lshr i32 %A, 31 @@ -301,8 +305,8 @@ ; ; 64-LABEL: rotr1_32: ; 64: # BB#0: -; 64-NEXT: rorl %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rorl %eax ; 64-NEXT: retq %B = shl i32 %A, 31 %C = lshr i32 %A, 1 @@ -321,8 +325,10 @@ ; 64-LABEL: rotl16: ; 64: # BB#0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rolw %cl, %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: %CL %CL %ECX +; 64-NEXT: rolw %cl, %ax +; 64-NEXT: # kill: %AX %AX %EAX ; 64-NEXT: retq %shift.upgrd.5 = zext i8 %Amt to i16 %B = shl i16 %A, %shift.upgrd.5 @@ -344,8 +350,10 @@ ; 64-LABEL: rotr16: ; 64: # BB#0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rorw %cl, %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: %CL %CL %ECX +; 64-NEXT: rorw %cl, %ax +; 64-NEXT: # kill: %AX %AX %EAX ; 64-NEXT: retq %shift.upgrd.7 = zext i8 %Amt to i16 %B = lshr i16 %A, %shift.upgrd.7 @@ -365,8 +373,9 @@ ; ; 64-LABEL: rotli16: ; 64: # BB#0: -; 64-NEXT: rolw $5, %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolw $5, %ax +; 64-NEXT: # kill: %AX %AX %EAX ; 64-NEXT: retq %B = shl i16 %A, 5 %C = lshr i16 %A, 11 @@ -383,8 +392,9 @@ ; ; 64-LABEL: rotri16: ; 64: # BB#0: -; 64-NEXT: rolw $11, %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolw $11, %ax +; 64-NEXT: # kill: %AX %AX %EAX ; 64-NEXT: retq %B = lshr i16 %A, 5 %C = shl i16 %A, 11 @@ -401,8 +411,9 @@ ; ; 64-LABEL: rotl1_16: ; 64: # BB#0: -; 64-NEXT: rolw %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolw %ax +; 64-NEXT: # kill: %AX %AX %EAX ; 64-NEXT: retq %B = shl i16 %A, 1 %C = lshr i16 %A, 15 @@ -419,8 +430,9 @@ ; ; 64-LABEL: rotr1_16: ; 64: # BB#0: -; 64-NEXT: rorw %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rorw %ax +; 64-NEXT: # kill: %AX %AX %EAX ; 64-NEXT: retq %B = lshr i16 %A, 1 %C = shl i16 %A, 15 @@ -439,8 +451,10 @@ ; 64-LABEL: rotl8: ; 64: # BB#0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rolb %cl, %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: %CL %CL %ECX +; 64-NEXT: rolb %cl, %al +; 64-NEXT: # kill: %AL %AL %EAX ; 64-NEXT: retq %B = shl i8 %A, %Amt %Amt2 = sub i8 8, %Amt @@ -460,8 +474,10 @@ ; 64-LABEL: rotr8: ; 64: # BB#0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rorb %cl, %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: %CL %CL %ECX +; 64-NEXT: rorb %cl, %al +; 64-NEXT: # kill: %AL %AL %EAX ; 64-NEXT: retq %B = lshr i8 %A, %Amt %Amt2 = sub i8 8, %Amt @@ -479,8 +495,9 @@ ; ; 64-LABEL: rotli8: ; 64: # BB#0: -; 64-NEXT: rolb $5, %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolb $5, %al +; 64-NEXT: # kill: %AL %AL %EAX ; 64-NEXT: retq %B = shl i8 %A, 5 %C = lshr i8 %A, 3 @@ -497,8 +514,9 @@ ; ; 64-LABEL: rotri8: ; 64: # BB#0: -; 64-NEXT: rolb $3, %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolb $3, %al +; 64-NEXT: # kill: %AL %AL %EAX ; 64-NEXT: retq %B = lshr i8 %A, 5 %C = shl i8 %A, 3 @@ -515,8 +533,9 @@ ; ; 64-LABEL: rotl1_8: ; 64: # BB#0: -; 64-NEXT: rolb %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolb %al +; 64-NEXT: # kill: %AL %AL %EAX ; 64-NEXT: retq %B = shl i8 %A, 1 %C = lshr i8 %A, 7 @@ -533,8 +552,9 @@ ; ; 64-LABEL: rotr1_8: ; 64: # BB#0: -; 64-NEXT: rorb %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rorb %al +; 64-NEXT: # kill: %AL %AL %EAX ; 64-NEXT: retq %B = lshr i8 %A, 1 %C = shl i8 %A, 7 Index: test/CodeGen/X86/rotate4.ll =================================================================== --- test/CodeGen/X86/rotate4.ll +++ test/CodeGen/X86/rotate4.ll @@ -8,8 +8,9 @@ ; CHECK-LABEL: rotate_left_32: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: roll %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %CL %CL %ECX +; CHECK-NEXT: roll %cl, %eax ; CHECK-NEXT: retq %and = and i32 %b, 31 %shl = shl i32 %a, %and @@ -24,8 +25,9 @@ ; CHECK-LABEL: rotate_right_32: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rorl %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %CL %CL %ECX +; CHECK-NEXT: rorl %cl, %eax ; CHECK-NEXT: retq %and = and i32 %b, 31 %shl = lshr i32 %a, %and @@ -39,9 +41,10 @@ define i64 @rotate_left_64(i64 %a, i64 %b) { ; CHECK-LABEL: rotate_left_64: ; CHECK: # BB#0: -; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rolq %cl, %rdi +; CHECK-NEXT: movq %rsi, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: rolq %cl, %rax ; CHECK-NEXT: retq %and = and i64 %b, 63 %shl = shl i64 %a, %and @@ -55,9 +58,10 @@ define i64 @rotate_right_64(i64 %a, i64 %b) { ; CHECK-LABEL: rotate_right_64: ; CHECK: # BB#0: -; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rorq %cl, %rdi +; CHECK-NEXT: movq %rsi, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: rorq %cl, %rax ; CHECK-NEXT: retq %and = and i64 %b, 63 %shl = lshr i64 %a, %and @@ -74,6 +78,7 @@ ; CHECK-LABEL: rotate_left_m32: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: %CL %CL %ECX ; CHECK-NEXT: roll %cl, (%rdi) ; CHECK-NEXT: retq %a = load i32, i32* %pa, align 16 @@ -91,6 +96,7 @@ ; CHECK-LABEL: rotate_right_m32: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: %CL %CL %ECX ; CHECK-NEXT: rorl %cl, (%rdi) ; CHECK-NEXT: retq %a = load i32, i32* %pa, align 16 @@ -107,7 +113,8 @@ define void @rotate_left_m64(i64 *%pa, i64 %b) { ; CHECK-LABEL: rotate_left_m64: ; CHECK: # BB#0: -; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: movq %rsi, %rcx +; CHECK-NEXT: # kill: %CL %CL %RCX ; CHECK-NEXT: rolq %cl, (%rdi) ; CHECK-NEXT: retq %a = load i64, i64* %pa, align 16 @@ -124,7 +131,8 @@ define void @rotate_right_m64(i64 *%pa, i64 %b) { ; CHECK-LABEL: rotate_right_m64: ; CHECK: # BB#0: -; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: movq %rsi, %rcx +; CHECK-NEXT: # kill: %CL %CL %RCX ; CHECK-NEXT: rorq %cl, (%rdi) ; CHECK-NEXT: retq %a = load i64, i64* %pa, align 16 @@ -145,8 +153,10 @@ ; CHECK-LABEL: rotate_left_8: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rolb %cl, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %CL %CL %ECX +; CHECK-NEXT: rolb %cl, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %amt = trunc i32 %amount to i8 %sub = sub i8 0, %amt @@ -162,8 +172,10 @@ ; CHECK-LABEL: rotate_right_8: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rorb %cl, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %CL %CL %ECX +; CHECK-NEXT: rorb %cl, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %amt = trunc i32 %amount to i8 %sub = sub i8 0, %amt @@ -179,8 +191,10 @@ ; CHECK-LABEL: rotate_left_16: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rolw %cl, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %CL %CL %ECX +; CHECK-NEXT: rolw %cl, %ax +; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: retq %amt = trunc i32 %amount to i16 %sub = sub i16 0, %amt @@ -196,8 +210,10 @@ ; CHECK-LABEL: rotate_right_16: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rorw %cl, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %CL %CL %ECX +; CHECK-NEXT: rorw %cl, %ax +; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: retq %amt = trunc i32 %amount to i16 %sub = sub i16 0, %amt @@ -213,6 +229,7 @@ ; CHECK-LABEL: rotate_left_m8: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: %CL %CL %ECX ; CHECK-NEXT: rolb %cl, (%rdi) ; CHECK-NEXT: retq %x = load i8, i8* %p, align 1 @@ -231,6 +248,7 @@ ; CHECK-LABEL: rotate_right_m8: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: %CL %CL %ECX ; CHECK-NEXT: rorb %cl, (%rdi) ; CHECK-NEXT: retq %x = load i8, i8* %p, align 1 @@ -249,6 +267,7 @@ ; CHECK-LABEL: rotate_left_m16: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: %CL %CL %ECX ; CHECK-NEXT: rolw %cl, (%rdi) ; CHECK-NEXT: retq %x = load i16, i16* %p, align 1 @@ -267,6 +286,7 @@ ; CHECK-LABEL: rotate_right_m16: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: %CL %CL %ECX ; CHECK-NEXT: rorw %cl, (%rdi) ; CHECK-NEXT: retq %x = load i16, i16* %p, align 1 Index: test/CodeGen/X86/sad.ll =================================================================== --- test/CodeGen/X86/sad.ll +++ test/CodeGen/X86/sad.ll @@ -600,12 +600,12 @@ ; SSE2-NEXT: paddd %xmm1, %xmm15 ; SSE2-NEXT: pxor %xmm1, %xmm15 ; SSE2-NEXT: paddd %xmm15, %xmm2 -; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm4 # 16-byte Reload -; SSE2-NEXT: movdqa %xmm4, %xmm1 -; SSE2-NEXT: psrad $31, %xmm1 -; SSE2-NEXT: paddd %xmm1, %xmm4 -; SSE2-NEXT: pxor %xmm1, %xmm4 -; SSE2-NEXT: paddd %xmm4, %xmm6 +; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; SSE2-NEXT: movdqa %xmm1, %xmm4 +; SSE2-NEXT: psrad $31, %xmm4 +; SSE2-NEXT: paddd %xmm4, %xmm1 +; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: paddd %xmm1, %xmm6 ; SSE2-NEXT: movdqa %xmm6, %xmm15 ; SSE2-NEXT: movdqa %xmm10, %xmm1 ; SSE2-NEXT: psrad $31, %xmm1 @@ -614,12 +614,12 @@ ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload ; SSE2-NEXT: paddd %xmm10, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm10 -; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm6 # 16-byte Reload -; SSE2-NEXT: movdqa %xmm6, %xmm1 -; SSE2-NEXT: psrad $31, %xmm1 -; SSE2-NEXT: paddd %xmm1, %xmm6 -; SSE2-NEXT: pxor %xmm1, %xmm6 -; SSE2-NEXT: paddd %xmm6, %xmm3 +; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; SSE2-NEXT: movdqa %xmm1, %xmm6 +; SSE2-NEXT: psrad $31, %xmm6 +; SSE2-NEXT: paddd %xmm6, %xmm1 +; SSE2-NEXT: pxor %xmm6, %xmm1 +; SSE2-NEXT: paddd %xmm1, %xmm3 ; SSE2-NEXT: movdqa %xmm12, %xmm1 ; SSE2-NEXT: psrad $31, %xmm1 ; SSE2-NEXT: paddd %xmm1, %xmm12 Index: test/CodeGen/X86/sar_fold64.ll =================================================================== --- test/CodeGen/X86/sar_fold64.ll +++ test/CodeGen/X86/sar_fold64.ll @@ -56,9 +56,10 @@ define i8 @all_sign_bit_ashr(i8 %x) { ; CHECK-LABEL: all_sign_bit_ashr: ; CHECK: # BB#0: -; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: negb %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: negb %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %and = and i8 %x, 1 %neg = sub i8 0, %and Index: test/CodeGen/X86/schedule-x86_64.ll =================================================================== --- test/CodeGen/X86/schedule-x86_64.ll +++ test/CodeGen/X86/schedule-x86_64.ll @@ -616,62 +616,62 @@ define i32 @test_bswap32(i32 %a0) optsize { ; GENERIC-LABEL: test_bswap32: ; GENERIC: # BB#0: -; GENERIC-NEXT: bswapl %edi # sched: [2:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: bswapl %eax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_bswap32: ; ATOM: # BB#0: -; ATOM-NEXT: bswapl %edi # sched: [1:1.00] ; ATOM-NEXT: movl %edi, %eax # sched: [1:0.50] +; ATOM-NEXT: bswapl %eax # sched: [1:1.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_bswap32: ; SLM: # BB#0: -; SLM-NEXT: bswapl %edi # sched: [1:0.50] ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NEXT: bswapl %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_bswap32: ; SANDY: # BB#0: -; SANDY-NEXT: bswapl %edi # sched: [2:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-NEXT: bswapl %eax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_bswap32: ; HASWELL: # BB#0: -; HASWELL-NEXT: bswapl %edi # sched: [2:0.50] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-NEXT: bswapl %eax # sched: [2:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BROADWELL-LABEL: test_bswap32: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: bswapl %edi # sched: [2:0.50] ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: bswapl %eax # sched: [2:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_bswap32: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: bswapl %edi # sched: [2:0.50] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: bswapl %eax # sched: [2:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_bswap32: ; SKX: # BB#0: -; SKX-NEXT: bswapl %edi # sched: [2:0.50] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: bswapl %eax # sched: [2:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_bswap32: ; BTVER2: # BB#0: -; BTVER2-NEXT: bswapl %edi # sched: [1:0.50] ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.17] +; BTVER2-NEXT: bswapl %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_bswap32: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: bswapl %edi # sched: [1:1.00] ; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: bswapl %eax # sched: [1:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = tail call i32 asm "bswap $0", "=r,0"(i32 %a0) nounwind ret i32 %1 @@ -679,62 +679,62 @@ define i64 @test_bswap64(i64 %a0) optsize { ; GENERIC-LABEL: test_bswap64: ; GENERIC: # BB#0: -; GENERIC-NEXT: bswapq %rdi # sched: [2:1.00] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: bswapq %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_bswap64: ; ATOM: # BB#0: -; ATOM-NEXT: bswapq %rdi # sched: [1:1.00] ; ATOM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; ATOM-NEXT: bswapq %rax # sched: [1:1.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_bswap64: ; SLM: # BB#0: -; SLM-NEXT: bswapq %rdi # sched: [1:0.50] ; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NEXT: bswapq %rax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_bswap64: ; SANDY: # BB#0: -; SANDY-NEXT: bswapq %rdi # sched: [2:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] +; SANDY-NEXT: bswapq %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_bswap64: ; HASWELL: # BB#0: -; HASWELL-NEXT: bswapq %rdi # sched: [2:0.50] ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HASWELL-NEXT: bswapq %rax # sched: [2:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BROADWELL-LABEL: test_bswap64: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: bswapq %rdi # sched: [2:0.50] ; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; BROADWELL-NEXT: bswapq %rax # sched: [2:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_bswap64: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: bswapq %rdi # sched: [2:0.50] ; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKYLAKE-NEXT: bswapq %rax # sched: [2:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_bswap64: ; SKX: # BB#0: -; SKX-NEXT: bswapq %rdi # sched: [2:0.50] ; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKX-NEXT: bswapq %rax # sched: [2:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_bswap64: ; BTVER2: # BB#0: -; BTVER2-NEXT: bswapq %rdi # sched: [1:0.50] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.17] +; BTVER2-NEXT: bswapq %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_bswap64: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: bswapq %rdi # sched: [1:1.00] ; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25] +; ZNVER1-NEXT: bswapq %rax # sched: [1:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = tail call i64 asm "bswap $0", "=r,0"(i64 %a0) nounwind ret i64 %1 Index: test/CodeGen/X86/select.ll =================================================================== --- test/CodeGen/X86/select.ll +++ test/CodeGen/X86/select.ll @@ -622,23 +622,14 @@ declare noalias i8* @_Znam(i64) noredzone define noalias i8* @test12(i64 %count) nounwind ssp noredzone { -; GENERIC-LABEL: test12: -; GENERIC: ## BB#0: ## %entry -; GENERIC-NEXT: movl $4, %ecx -; GENERIC-NEXT: movq %rdi, %rax -; GENERIC-NEXT: mulq %rcx -; GENERIC-NEXT: movq $-1, %rdi -; GENERIC-NEXT: cmovnoq %rax, %rdi -; GENERIC-NEXT: jmp __Znam ## TAILCALL -; -; ATOM-LABEL: test12: -; ATOM: ## BB#0: ## %entry -; ATOM-NEXT: movq %rdi, %rax -; ATOM-NEXT: movl $4, %ecx -; ATOM-NEXT: mulq %rcx -; ATOM-NEXT: movq $-1, %rdi -; ATOM-NEXT: cmovnoq %rax, %rdi -; ATOM-NEXT: jmp __Znam ## TAILCALL +; CHECK-LABEL: test12: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movl $4, %ecx +; CHECK-NEXT: mulq %rcx +; CHECK-NEXT: movq $-1, %rdi +; CHECK-NEXT: cmovnoq %rax, %rdi +; CHECK-NEXT: jmp __Znam ## TAILCALL ; ; MCU-LABEL: test12: ; MCU: # BB#0: # %entry @@ -827,16 +818,18 @@ define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind { ; GENERIC-LABEL: test18: ; GENERIC: ## BB#0: -; GENERIC-NEXT: cmpl $15, %edi -; GENERIC-NEXT: cmovgel %edx, %esi ; GENERIC-NEXT: movl %esi, %eax +; GENERIC-NEXT: cmpl $15, %edi +; GENERIC-NEXT: cmovgel %edx, %eax +; GENERIC-NEXT: ## kill: %AL %AL %EAX ; GENERIC-NEXT: retq ; ; ATOM-LABEL: test18: ; ATOM: ## BB#0: -; ATOM-NEXT: cmpl $15, %edi -; ATOM-NEXT: cmovgel %edx, %esi ; ATOM-NEXT: movl %esi, %eax +; ATOM-NEXT: cmpl $15, %edi +; ATOM-NEXT: cmovgel %edx, %eax +; ATOM-NEXT: ## kill: %AL %AL %EAX ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq @@ -858,16 +851,18 @@ define i32 @trunc_select_miscompile(i32 %a, i1 zeroext %cc) { ; CHECK-LABEL: trunc_select_miscompile: ; CHECK: ## BB#0: -; CHECK-NEXT: orb $2, %sil ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: shll %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orb $2, %cl +; CHECK-NEXT: ## kill: %CL %CL %ECX +; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: trunc_select_miscompile: ; MCU: # BB#0: -; MCU-NEXT: orb $2, %dl ; MCU-NEXT: movl %edx, %ecx +; MCU-NEXT: orb $2, %cl +; MCU-NEXT: # kill: %CL %CL %ECX ; MCU-NEXT: shll %cl, %eax ; MCU-NEXT: retl %tmp1 = select i1 %cc, i32 3, i32 2 @@ -1061,10 +1056,11 @@ ; CHECK-LABEL: select_xor_1: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: xorl $43, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: xorl $43, %ecx ; CHECK-NEXT: testb $1, %sil -; CHECK-NEXT: cmovnew %ax, %di -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: cmovnew %cx, %ax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq ; ; MCU-LABEL: select_xor_1: @@ -1086,10 +1082,10 @@ define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) { ; CHECK-LABEL: select_xor_2: ; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: xorl %edi, %esi -; CHECK-NEXT: testb $1, %dl -; CHECK-NEXT: cmovel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: xorl %edi, %eax +; CHECK-NEXT: testb $1, %dl +; CHECK-NEXT: cmovel %edi, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: select_xor_2: @@ -1110,10 +1106,10 @@ define i32 @select_or(i32 %A, i32 %B, i8 %cond) { ; CHECK-LABEL: select_or: ; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: testb $1, %dl -; CHECK-NEXT: cmovel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: testb $1, %dl +; CHECK-NEXT: cmovel %edi, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: select_or: @@ -1134,10 +1130,10 @@ define i32 @select_or_1(i32 %A, i32 %B, i32 %cond) { ; CHECK-LABEL: select_or_1: ; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: testb $1, %dl -; CHECK-NEXT: cmovel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: testb $1, %dl +; CHECK-NEXT: cmovel %edi, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: select_or_1: Index: test/CodeGen/X86/select_const.ll =================================================================== --- test/CodeGen/X86/select_const.ll +++ test/CodeGen/X86/select_const.ll @@ -43,8 +43,8 @@ define i32 @select_1_or_0(i1 %cond) { ; CHECK-LABEL: select_1_or_0: ; CHECK: # BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 1, i32 0 ret i32 %sel @@ -62,8 +62,8 @@ define i32 @select_1_or_0_signext(i1 signext %cond) { ; CHECK-LABEL: select_1_or_0_signext: ; CHECK: # BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 1, i32 0 ret i32 %sel @@ -95,8 +95,8 @@ define i32 @select_0_or_neg1_signext(i1 signext %cond) { ; CHECK-LABEL: select_0_or_neg1_signext: ; CHECK: # BB#0: -; CHECK-NEXT: notl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notl %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel @@ -107,9 +107,9 @@ define i32 @select_neg1_or_0(i1 %cond) { ; CHECK-LABEL: select_neg1_or_0: ; CHECK: # BB#0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: negl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: negl %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 -1, i32 0 ret i32 %sel @@ -118,8 +118,8 @@ define i32 @select_neg1_or_0_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_neg1_or_0_zeroext: ; CHECK: # BB#0: -; CHECK-NEXT: negl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: negl %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 -1, i32 0 ret i32 %sel @@ -329,9 +329,10 @@ define i8 @select_pow2_diff(i1 zeroext %cond) { ; CHECK-LABEL: select_pow2_diff: ; CHECK: # BB#0: -; CHECK-NEXT: shlb $4, %dil -; CHECK-NEXT: orb $3, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlb $4, %al +; CHECK-NEXT: orb $3, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %sel = select i1 %cond, i8 19, i8 3 ret i8 %sel Index: test/CodeGen/X86/setcc-logic.ll =================================================================== --- test/CodeGen/X86/setcc-logic.ll +++ test/CodeGen/X86/setcc-logic.ll @@ -41,9 +41,10 @@ define zeroext i1 @all_sign_bits_set(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: all_sign_bits_set: ; CHECK: # BB#0: -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: shrl $31, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %a = icmp slt i32 %P, 0 %b = icmp slt i32 %Q, 0 @@ -66,9 +67,10 @@ define zeroext i1 @any_sign_bits_set(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: any_sign_bits_set: ; CHECK: # BB#0: -; CHECK-NEXT: orl %esi, %edi -; CHECK-NEXT: shrl $31, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %a = icmp slt i32 %P, 0 %b = icmp slt i32 %Q, 0 Index: test/CodeGen/X86/sext-i1.ll =================================================================== --- test/CodeGen/X86/sext-i1.ll +++ test/CodeGen/X86/sext-i1.ll @@ -165,8 +165,8 @@ ; ; X64-LABEL: select_0_or_1s_signext: ; X64: # BB#0: -; X64-NEXT: notl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: notl %eax ; X64-NEXT: retq %not = xor i1 %cond, 1 %sext = sext i1 %not to i32 Index: test/CodeGen/X86/shift-and.ll =================================================================== --- test/CodeGen/X86/shift-and.ll +++ test/CodeGen/X86/shift-and.ll @@ -12,9 +12,10 @@ ; ; X64-LABEL: t1: ; X64: # BB#0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shll %cl, %esi ; X64-NEXT: movl %esi, %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shll %cl, %eax ; X64-NEXT: retq %shamt = and i32 %t, 31 %res = shl i32 %val, %shamt @@ -31,9 +32,10 @@ ; ; X64-LABEL: t2: ; X64: # BB#0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shll %cl, %esi ; X64-NEXT: movl %esi, %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shll %cl, %eax ; X64-NEXT: retq %shamt = and i32 %t, 63 %res = shl i32 %val, %shamt @@ -52,6 +54,7 @@ ; X64-LABEL: t3: ; X64: # BB#0: ; X64-NEXT: movl %edi, %ecx +; X64-NEXT: # kill: %CL %CL %ECX ; X64-NEXT: sarw %cl, {{.*}}(%rip) ; X64-NEXT: retq %shamt = and i16 %t, 31 @@ -82,9 +85,10 @@ ; ; X64-LABEL: t4: ; X64: # BB#0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shrq %cl, %rsi ; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: # kill: %CL %CL %RCX +; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %shamt = and i64 %t, 63 %res = lshr i64 %val, %shamt @@ -112,9 +116,10 @@ ; ; X64-LABEL: t5: ; X64: # BB#0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shrq %cl, %rsi ; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: # kill: %CL %CL %RCX +; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %shamt = and i64 %t, 191 %res = lshr i64 %val, %shamt @@ -147,7 +152,8 @@ ; ; X64-LABEL: t5ptr: ; X64: # BB#0: -; X64-NEXT: movl %edi, %ecx +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: # kill: %CL %CL %RCX ; X64-NEXT: shrq %cl, (%rsi) ; X64-NEXT: retq %shamt = and i64 %t, 191 @@ -205,9 +211,9 @@ ; ; X64-LABEL: big_mask_constant: ; X64: # BB#0: -; X64-NEXT: shrq $7, %rdi -; X64-NEXT: andl $134217728, %edi # imm = 0x8000000 ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $7, %rax +; X64-NEXT: andl $134217728, %eax # imm = 0x8000000 ; X64-NEXT: retq %and = and i64 %x, 17179869184 ; 0x400000000 %sh = lshr i64 %and, 7 Index: test/CodeGen/X86/shift-bmi2.ll =================================================================== --- test/CodeGen/X86/shift-bmi2.ll +++ test/CodeGen/X86/shift-bmi2.ll @@ -26,8 +26,8 @@ ; ; BMI264-LABEL: shl32i: ; BMI264: # BB#0: -; BMI264-NEXT: shll $5, %edi ; BMI264-NEXT: movl %edi, %eax +; BMI264-NEXT: shll $5, %eax ; BMI264-NEXT: retq %shl = shl i32 %x, 5 ret i32 %shl @@ -69,6 +69,23 @@ } define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: shl64: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shldl %cl, %eax, %edx +; BMI2-NEXT: shlxl %ecx, %eax, %esi +; BMI2-NEXT: xorl %eax, %eax +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %edx +; BMI2-NEXT: cmovel %esi, %eax +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64: ; BMI264: # BB#0: ; BMI264-NEXT: shlxq %rsi, %rdi, %rax @@ -78,16 +95,42 @@ } define i64 @shl64i(i64 %x) nounwind uwtable readnone { +; BMI2-LABEL: shl64i: +; BMI2: # BB#0: +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shldl $7, %eax, %edx +; BMI2-NEXT: shll $7, %eax +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64i: ; BMI264: # BB#0: -; BMI264-NEXT: shlq $7, %rdi ; BMI264-NEXT: movq %rdi, %rax +; BMI264-NEXT: shlq $7, %rax ; BMI264-NEXT: retq %shl = shl i64 %x, 7 ret i64 %shl } define i64 @shl64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: shl64p: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl (%eax), %esi +; BMI2-NEXT: movl 4(%eax), %edx +; BMI2-NEXT: shldl %cl, %esi, %edx +; BMI2-NEXT: shlxl %ecx, %esi, %esi +; BMI2-NEXT: xorl %eax, %eax +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %edx +; BMI2-NEXT: cmovel %esi, %eax +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64p: ; BMI264: # BB#0: ; BMI264-NEXT: shlxq %rsi, (%rdi), %rax @@ -98,6 +141,15 @@ } define i64 @shl64pi(i64* %p) nounwind uwtable readnone { +; BMI2-LABEL: shl64pi: +; BMI2: # BB#0: +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; BMI2-NEXT: movl (%ecx), %eax +; BMI2-NEXT: movl 4(%ecx), %edx +; BMI2-NEXT: shldl $7, %eax, %edx +; BMI2-NEXT: shll $7, %eax +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64pi: ; BMI264: # BB#0: ; BMI264-NEXT: movq (%rdi), %rax @@ -141,6 +193,23 @@ } define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: lshr64: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: shrxl %ecx, %edx, %esi +; BMI2-NEXT: xorl %edx, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: lshr64: ; BMI264: # BB#0: ; BMI264-NEXT: shrxq %rsi, %rdi, %rax @@ -150,6 +219,24 @@ } define i64 @lshr64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: lshr64p: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: movl (%edx), %eax +; BMI2-NEXT: movl 4(%edx), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: shrxl %ecx, %edx, %esi +; BMI2-NEXT: xorl %edx, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: lshr64p: ; BMI264: # BB#0: ; BMI264-NEXT: shrxq %rsi, (%rdi), %rax @@ -192,6 +279,23 @@ } define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: ashr64: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: sarxl %ecx, %edx, %esi +; BMI2-NEXT: sarl $31, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: ashr64: ; BMI264: # BB#0: ; BMI264-NEXT: sarxq %rsi, %rdi, %rax @@ -201,6 +305,24 @@ } define i64 @ashr64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: ashr64p: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: movl (%edx), %eax +; BMI2-NEXT: movl 4(%edx), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: sarxl %ecx, %edx, %esi +; BMI2-NEXT: sarl $31, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: ashr64p: ; BMI264: # BB#0: ; BMI264-NEXT: sarxq %rsi, (%rdi), %rax @@ -227,6 +349,21 @@ } define i64 @shl64and(i64 %t, i64 %val) nounwind { +; BMI2-LABEL: shl64and: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shldl %cl, %eax, %edx +; BMI2-NEXT: shlxl %ecx, %eax, %esi +; BMI2-NEXT: xorl %eax, %eax +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %edx +; BMI2-NEXT: cmovel %esi, %eax +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64and: ; BMI264: # BB#0: ; BMI264-NEXT: shlxq %rdi, %rsi, %rax @@ -253,6 +390,21 @@ } define i64 @lshr64and(i64 %t, i64 %val) nounwind { +; BMI2-LABEL: lshr64and: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: shrxl %ecx, %edx, %esi +; BMI2-NEXT: xorl %edx, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: lshr64and: ; BMI264: # BB#0: ; BMI264-NEXT: shrxq %rdi, %rsi, %rax @@ -279,6 +431,21 @@ } define i64 @ashr64and(i64 %t, i64 %val) nounwind { +; BMI2-LABEL: ashr64and: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: sarxl %ecx, %edx, %esi +; BMI2-NEXT: sarl $31, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: ashr64and: ; BMI264: # BB#0: ; BMI264-NEXT: sarxq %rdi, %rsi, %rax Index: test/CodeGen/X86/shift-double-x86_64.ll =================================================================== --- test/CodeGen/X86/shift-double-x86_64.ll +++ test/CodeGen/X86/shift-double-x86_64.ll @@ -6,10 +6,11 @@ define i64 @test1(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test1: ; CHECK: # BB#0: -; CHECK-NEXT: andl $63, %edx -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: andl $63, %ecx +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: shldq %cl, %rsi, %rax ; CHECK-NEXT: retq %and = and i64 %bits, 63 %and64 = sub i64 64, %and @@ -22,10 +23,11 @@ define i64 @test2(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test2: ; CHECK: # BB#0: -; CHECK-NEXT: andl $63, %edx -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrdq %cl, %rdi, %rsi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: andl $63, %ecx +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: shrdq %cl, %rdi, %rax ; CHECK-NEXT: retq %and = and i64 %bits, 63 %and64 = sub i64 64, %and @@ -38,9 +40,10 @@ define i64 @test3(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test3: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: shldq %cl, %rsi, %rax ; CHECK-NEXT: retq %bits64 = sub i64 64, %bits %sh_lo = lshr i64 %lo, %bits64 @@ -52,9 +55,10 @@ define i64 @test4(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test4: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrdq %cl, %rdi, %rsi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: shrdq %cl, %rdi, %rax ; CHECK-NEXT: retq %bits64 = sub i64 64, %bits %sh_lo = shl i64 %hi, %bits64 @@ -66,9 +70,10 @@ define i64 @test5(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test5: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: shldq %cl, %rsi, %rax ; CHECK-NEXT: retq %bits64 = xor i64 %bits, 63 %lo2 = lshr i64 %lo, 1 @@ -81,9 +86,10 @@ define i64 @test6(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test6: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: shrdq %cl, %rsi, %rax ; CHECK-NEXT: retq %bits64 = xor i64 %bits, 63 %lo2 = shl i64 %lo, 1 @@ -96,9 +102,10 @@ define i64 @test7(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test7: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: shrdq %cl, %rsi, %rax ; CHECK-NEXT: retq %bits64 = xor i64 %bits, 63 %lo2 = add i64 %lo, %lo Index: test/CodeGen/X86/shift-double.ll =================================================================== --- test/CodeGen/X86/shift-double.ll +++ test/CodeGen/X86/shift-double.ll @@ -26,8 +26,9 @@ ; X64-LABEL: test1: ; X64: # BB#0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shlq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shlq %cl, %rax ; X64-NEXT: retq %shift.upgrd.1 = zext i8 %C to i64 ; [#uses=1] %Y = shl i64 %X, %shift.upgrd.1 ; [#uses=1] @@ -57,8 +58,9 @@ ; X64-LABEL: test2: ; X64: # BB#0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: sarq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: sarq %cl, %rax ; X64-NEXT: retq %shift.upgrd.2 = zext i8 %C to i64 ; [#uses=1] %Y = ashr i64 %X, %shift.upgrd.2 ; [#uses=1] @@ -87,8 +89,9 @@ ; X64-LABEL: test3: ; X64: # BB#0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shrq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %shift.upgrd.3 = zext i8 %C to i64 ; [#uses=1] %Y = lshr i64 %X, %shift.upgrd.3 ; [#uses=1] @@ -109,8 +112,9 @@ ; X64-LABEL: test4: ; X64: # BB#0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shldl %cl, %esi, %eax ; X64-NEXT: retq %shift.upgrd.4 = zext i8 %C to i32 ; [#uses=1] %X = shl i32 %A, %shift.upgrd.4 ; [#uses=1] @@ -133,8 +137,10 @@ ; X64-LABEL: test5: ; X64: # BB#0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldw %cl, %si, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shldw %cl, %si, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %shift.upgrd.6 = zext i8 %C to i16 ; [#uses=1] %X = shl i16 %A, %shift.upgrd.6 ; [#uses=1] @@ -159,8 +165,9 @@ ; X64-LABEL: test6: ; X64: # BB#0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shrdl %cl, %esi, %eax ; X64-NEXT: retq %shift.upgrd.4 = zext i8 %C to i32 ; [#uses=1] %X = lshr i32 %A, %shift.upgrd.4 ; [#uses=1] @@ -183,8 +190,10 @@ ; X64-LABEL: test7: ; X64: # BB#0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdw %cl, %si, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shrdw %cl, %si, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %shift.upgrd.6 = zext i8 %C to i16 ; [#uses=1] %X = lshr i16 %A, %shift.upgrd.6 ; [#uses=1] @@ -212,10 +221,11 @@ ; ; X64-LABEL: test8: ; X64: # BB#0: -; X64-NEXT: andb $31, %sil ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shlq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andb $31, %cl +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shlq %cl, %rax ; X64-NEXT: retq %and = and i32 %bits, 31 %sh_prom = zext i32 %and to i64 @@ -235,10 +245,11 @@ ; ; X64-LABEL: test9: ; X64: # BB#0: -; X64-NEXT: andb $31, %sil ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: sarq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andb $31, %cl +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: sarq %cl, %rax ; X64-NEXT: retq %and = and i32 %bits, 31 %sh_prom = zext i32 %and to i64 @@ -258,10 +269,11 @@ ; ; X64-LABEL: test10: ; X64: # BB#0: -; X64-NEXT: andb $31, %sil ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shrq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andb $31, %cl +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %and = and i32 %bits, 31 %sh_prom = zext i32 %and to i64 @@ -284,10 +296,11 @@ ; ; X64-LABEL: test11: ; X64: # BB#0: -; X64-NEXT: andl $31, %edx ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $31, %ecx +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shldl %cl, %esi, %eax ; X64-NEXT: retq %and = and i32 %bits, 31 %and32 = sub i32 32, %and @@ -310,10 +323,11 @@ ; ; X64-LABEL: test12: ; X64: # BB#0: -; X64-NEXT: andl $31, %edx ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdl %cl, %edi, %esi ; X64-NEXT: movl %esi, %eax +; X64-NEXT: andl $31, %ecx +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shrdl %cl, %edi, %eax ; X64-NEXT: retq %and = and i32 %bits, 31 %and32 = sub i32 32, %and @@ -335,8 +349,9 @@ ; X64-LABEL: test13: ; X64: # BB#0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shldl %cl, %esi, %eax ; X64-NEXT: retq %bits32 = sub i32 32, %bits %sh_lo = lshr i32 %lo, %bits32 @@ -357,8 +372,9 @@ ; X64-LABEL: test14: ; X64: # BB#0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdl %cl, %edi, %esi ; X64-NEXT: movl %esi, %eax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shrdl %cl, %edi, %eax ; X64-NEXT: retq %bits32 = sub i32 32, %bits %sh_lo = shl i32 %hi, %bits32 @@ -379,8 +395,9 @@ ; X64-LABEL: test15: ; X64: # BB#0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shldl %cl, %esi, %eax ; X64-NEXT: retq %bits32 = xor i32 %bits, 31 %lo2 = lshr i32 %lo, 1 @@ -402,8 +419,9 @@ ; X64-LABEL: test16: ; X64: # BB#0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shrdl %cl, %esi, %eax ; X64-NEXT: retq %bits32 = xor i32 %bits, 31 %lo2 = shl i32 %lo, 1 @@ -425,8 +443,9 @@ ; X64-LABEL: test17: ; X64: # BB#0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shrdl %cl, %esi, %eax ; X64-NEXT: retq %bits32 = xor i32 %bits, 31 %lo2 = add i32 %lo, %lo Index: test/CodeGen/X86/sret-implicit.ll =================================================================== --- test/CodeGen/X86/sret-implicit.ll +++ test/CodeGen/X86/sret-implicit.ll @@ -10,7 +10,7 @@ } ; X64-LABEL: sret_void -; X64-DAG: movl $0, (%rdi) +; X64-DAG: movl $0, (%rax) ; X64-DAG: movq %rdi, %rax ; X64: retq @@ -24,7 +24,7 @@ } ; X64-LABEL: sret_demoted -; X64-DAG: movq $0, (%rdi) +; X64-DAG: movq $0, (%rax) ; X64-DAG: movq %rdi, %rax ; X64: retq Index: test/CodeGen/X86/sse1.ll =================================================================== --- test/CodeGen/X86/sse1.ll +++ test/CodeGen/X86/sse1.ll @@ -192,9 +192,10 @@ ; ; X64-LABEL: PR30512: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorl %edi, %edi ; X64-NEXT: cmpl %r9d, %esi -; X64-NEXT: sete %al +; X64-NEXT: sete %dil ; X64-NEXT: xorl %esi, %esi ; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %edx ; X64-NEXT: sete %sil @@ -204,11 +205,10 @@ ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %r8d ; X64-NEXT: sete %cl -; X64-NEXT: movl %ecx, 12(%rdi) -; X64-NEXT: movl %edx, 8(%rdi) -; X64-NEXT: movl %esi, 4(%rdi) -; X64-NEXT: movl %eax, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movl %ecx, 12(%rax) +; X64-NEXT: movl %edx, 8(%rax) +; X64-NEXT: movl %esi, 4(%rax) +; X64-NEXT: movl %edi, (%rax) ; X64-NEXT: retq %cmp = icmp eq <4 x i32> %x, %y %zext = zext <4 x i1> %cmp to <4 x i32> Index: test/CodeGen/X86/sse3-schedule.ll =================================================================== --- test/CodeGen/X86/sse3-schedule.ll +++ test/CodeGen/X86/sse3-schedule.ll @@ -473,8 +473,8 @@ define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) { ; GENERIC-LABEL: test_monitor: ; GENERIC: # BB#0: -; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33] +; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; GENERIC-NEXT: monitor # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -487,57 +487,57 @@ ; ; SLM-LABEL: test_monitor: ; SLM: # BB#0: -; SLM-NEXT: leaq (%rdi), %rax # sched: [1:1.00] ; SLM-NEXT: movl %esi, %ecx # sched: [1:0.50] +; SLM-NEXT: leaq (%rdi), %rax # sched: [1:1.00] ; SLM-NEXT: monitor # sched: [100:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_monitor: ; SANDY: # BB#0: -; SANDY-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SANDY-NEXT: movl %esi, %ecx # sched: [1:0.33] +; SANDY-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SANDY-NEXT: monitor # sched: [100:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_monitor: ; HASWELL: # BB#0: -; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; HASWELL-NEXT: movl %esi, %ecx # sched: [1:0.25] +; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; HASWELL-NEXT: monitor # sched: [100:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BROADWELL-LABEL: test_monitor: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BROADWELL-NEXT: movl %esi, %ecx # sched: [1:0.25] +; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BROADWELL-NEXT: monitor # sched: [100:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_monitor: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKYLAKE-NEXT: movl %esi, %ecx # sched: [1:0.25] +; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKYLAKE-NEXT: monitor # sched: [100:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_monitor: ; SKX: # BB#0: -; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKX-NEXT: movl %esi, %ecx # sched: [1:0.25] +; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKX-NEXT: monitor # sched: [100:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_monitor: ; BTVER2: # BB#0: -; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.17] +; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BTVER2-NEXT: monitor # sched: [100:0.17] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_monitor: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] ; ZNVER1-NEXT: movl %esi, %ecx # sched: [1:0.25] +; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] ; ZNVER1-NEXT: monitor # sched: [100:?] ; ZNVER1-NEXT: retq # sched: [1:0.50] tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2) @@ -782,71 +782,71 @@ define void @test_mwait(i32 %a0, i32 %a1) { ; GENERIC-LABEL: test_mwait: ; GENERIC: # BB#0: -; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] ; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33] +; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] ; GENERIC-NEXT: mwait # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_mwait: ; ATOM: # BB#0: -; ATOM-NEXT: movl %edi, %ecx # sched: [1:0.50] ; ATOM-NEXT: movl %esi, %eax # sched: [1:0.50] +; ATOM-NEXT: movl %edi, %ecx # sched: [1:0.50] ; ATOM-NEXT: mwait # sched: [46:23.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_mwait: ; SLM: # BB#0: -; SLM-NEXT: movl %edi, %ecx # sched: [1:0.50] ; SLM-NEXT: movl %esi, %eax # sched: [1:0.50] +; SLM-NEXT: movl %edi, %ecx # sched: [1:0.50] ; SLM-NEXT: mwait # sched: [100:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_mwait: ; SANDY: # BB#0: -; SANDY-NEXT: movl %edi, %ecx # sched: [1:0.33] ; SANDY-NEXT: movl %esi, %eax # sched: [1:0.33] +; SANDY-NEXT: movl %edi, %ecx # sched: [1:0.33] ; SANDY-NEXT: mwait # sched: [100:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mwait: ; HASWELL: # BB#0: -; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] ; HASWELL-NEXT: movl %esi, %eax # sched: [1:0.25] +; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] ; HASWELL-NEXT: mwait # sched: [20:2.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BROADWELL-LABEL: test_mwait: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] ; BROADWELL-NEXT: movl %esi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] ; BROADWELL-NEXT: mwait # sched: [100:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_mwait: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKYLAKE-NEXT: movl %esi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKYLAKE-NEXT: mwait # sched: [20:2.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mwait: ; SKX: # BB#0: -; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKX-NEXT: movl %esi, %eax # sched: [1:0.25] +; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKX-NEXT: mwait # sched: [20:2.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_mwait: ; BTVER2: # BB#0: -; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.17] ; BTVER2-NEXT: movl %esi, %eax # sched: [1:0.17] +; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.17] ; BTVER2-NEXT: mwait # sched: [100:0.17] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_mwait: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25] ; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25] ; ZNVER1-NEXT: mwait # sched: [100:?] ; ZNVER1-NEXT: retq # sched: [1:0.50] tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1) Index: test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll =================================================================== --- test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll +++ test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll @@ -17,8 +17,8 @@ define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{ ; X64-LABEL: test_mm_crc64_u64: ; X64: # BB#0: -; X64-NEXT: crc32q %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: crc32q %rsi, %rax ; X64-NEXT: retq %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1) ret i64 %res Index: test/CodeGen/X86/sse42-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/sse42-intrinsics-fast-isel.ll +++ test/CodeGen/X86/sse42-intrinsics-fast-isel.ll @@ -19,12 +19,12 @@ ; ; X64-LABEL: test_mm_cmpestra: ; X64: # BB#0: -; X64-NEXT: xorl %r8d, %r8d -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: seta %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: seta %sil +; X64-NEXT: movl %esi, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -48,12 +48,12 @@ ; ; X64-LABEL: test_mm_cmpestrc: ; X64: # BB#0: -; X64-NEXT: xorl %r8d, %r8d -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: setb %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: setb %sil +; X64-NEXT: movl %esi, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -73,8 +73,8 @@ ; ; X64-LABEL: test_mm_cmpestri: ; X64: # BB#0: -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: retq @@ -95,8 +95,8 @@ ; ; X64-LABEL: test_mm_cmpestrm: ; X64: # BB#0: -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pcmpestrm $7, %xmm1, %xmm0 ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> @@ -122,12 +122,12 @@ ; ; X64-LABEL: test_mm_cmpestro: ; X64: # BB#0: -; X64-NEXT: xorl %r8d, %r8d -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: seto %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: seto %sil +; X64-NEXT: movl %esi, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -151,12 +151,12 @@ ; ; X64-LABEL: test_mm_cmpestrs: ; X64: # BB#0: -; X64-NEXT: xorl %r8d, %r8d -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: sets %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: sets %sil +; X64-NEXT: movl %esi, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -180,12 +180,12 @@ ; ; X64-LABEL: test_mm_cmpestrz: ; X64: # BB#0: -; X64-NEXT: xorl %r8d, %r8d -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: sete %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: sete %sil +; X64-NEXT: movl %esi, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -361,8 +361,8 @@ ; ; X64-LABEL: test_mm_crc32_u8: ; X64: # BB#0: -; X64-NEXT: crc32b %sil, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: crc32b %sil, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1) ret i32 %res @@ -379,8 +379,8 @@ ; ; X64-LABEL: test_mm_crc32_u16: ; X64: # BB#0: -; X64-NEXT: crc32w %si, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: crc32w %si, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1) ret i32 %res @@ -396,8 +396,8 @@ ; ; X64-LABEL: test_mm_crc32_u32: ; X64: # BB#0: -; X64-NEXT: crc32l %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: crc32l %esi, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) ret i32 %res Index: test/CodeGen/X86/sse42-intrinsics-x86_64.ll =================================================================== --- test/CodeGen/X86/sse42-intrinsics-x86_64.ll +++ test/CodeGen/X86/sse42-intrinsics-x86_64.ll @@ -9,8 +9,8 @@ define i64 @crc32_64_8(i64 %a, i8 %b) nounwind { ; CHECK-LABEL: crc32_64_8: ; CHECK: ## BB#0: -; CHECK-NEXT: crc32b %sil, %edi ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xfe] ; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] ; CHECK-NEXT: retq ## encoding: [0xc3] %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b) ret i64 %tmp @@ -19,8 +19,8 @@ define i64 @crc32_64_64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: crc32_64_64: ; CHECK: ## BB#0: -; CHECK-NEXT: crc32q %rsi, %rdi ## encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xfe] ; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: crc32q %rsi, %rax ## encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6] ; CHECK-NEXT: retq ## encoding: [0xc3] %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b) ret i64 %tmp Index: test/CodeGen/X86/sse42-schedule.ll =================================================================== --- test/CodeGen/X86/sse42-schedule.ll +++ test/CodeGen/X86/sse42-schedule.ll @@ -13,65 +13,65 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; GENERIC-LABEL: crc32_32_8: ; GENERIC: # BB#0: -; GENERIC-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; GENERIC-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_32_8: ; SLM: # BB#0: -; SLM-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SLM-NEXT: crc32b (%rdx), %edi # sched: [6:1.00] ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SLM-NEXT: crc32b (%rdx), %eax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: crc32_32_8: ; SANDY: # BB#0: -; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SANDY-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_32_8: ; HASWELL: # BB#0: -; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; HASWELL-NEXT: crc32b (%rdx), %eax # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BROADWELL-LABEL: crc32_32_8: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; BROADWELL-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; BROADWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_32_8: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_32_8: ; SKX: # BB#0: -; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKX-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_32_8: ; BTVER2: # BB#0: -; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; BTVER2-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.17] +; BTVER2-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; BTVER2-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: crc32_32_8: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; ZNVER1-NEXT: crc32b (%rdx), %edi # sched: [10:1.00] ; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; ZNVER1-NEXT: crc32b (%rdx), %eax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1) %2 = load i8, i8 *%a2 @@ -83,65 +83,65 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { ; GENERIC-LABEL: crc32_32_16: ; GENERIC: # BB#0: -; GENERIC-NEXT: crc32w %si, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: crc32w %si, %eax # sched: [3:1.00] +; GENERIC-NEXT: crc32w (%rdx), %eax # sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_32_16: ; SLM: # BB#0: -; SLM-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SLM-NEXT: crc32w (%rdx), %edi # sched: [6:1.00] ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SLM-NEXT: crc32w (%rdx), %eax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: crc32_32_16: ; SANDY: # BB#0: -; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SANDY-NEXT: crc32w (%rdx), %eax # sched: [7:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_32_16: ; HASWELL: # BB#0: -; HASWELL-NEXT: crc32w %si, %edi # sched: [3:1.00] -; HASWELL-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-NEXT: crc32w %si, %eax # sched: [3:1.00] +; HASWELL-NEXT: crc32w (%rdx), %eax # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BROADWELL-LABEL: crc32_32_16: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: crc32w %si, %edi # sched: [3:1.00] -; BROADWELL-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: crc32w %si, %eax # sched: [3:1.00] +; BROADWELL-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_32_16: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_32_16: ; SKX: # BB#0: -; SKX-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SKX-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SKX-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_32_16: ; BTVER2: # BB#0: -; BTVER2-NEXT: crc32w %si, %edi # sched: [3:1.00] -; BTVER2-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.17] +; BTVER2-NEXT: crc32w %si, %eax # sched: [3:1.00] +; BTVER2-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: crc32_32_16: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: crc32w %si, %edi # sched: [3:1.00] -; ZNVER1-NEXT: crc32w (%rdx), %edi # sched: [10:1.00] ; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: crc32w %si, %eax # sched: [3:1.00] +; ZNVER1-NEXT: crc32w (%rdx), %eax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1) %2 = load i16, i16 *%a2 @@ -153,65 +153,65 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: crc32_32_32: ; GENERIC: # BB#0: -; GENERIC-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; GENERIC-NEXT: crc32l (%rdx), %eax # sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_32_32: ; SLM: # BB#0: -; SLM-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SLM-NEXT: crc32l (%rdx), %edi # sched: [6:1.00] ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SLM-NEXT: crc32l (%rdx), %eax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: crc32_32_32: ; SANDY: # BB#0: -; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SANDY-NEXT: crc32l (%rdx), %eax # sched: [7:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_32_32: ; HASWELL: # BB#0: -; HASWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; HASWELL-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; HASWELL-NEXT: crc32l (%rdx), %eax # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BROADWELL-LABEL: crc32_32_32: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; BROADWELL-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; BROADWELL-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_32_32: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_32_32: ; SKX: # BB#0: -; SKX-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SKX-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SKX-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_32_32: ; BTVER2: # BB#0: -; BTVER2-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; BTVER2-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.17] +; BTVER2-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; BTVER2-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: crc32_32_32: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; ZNVER1-NEXT: crc32l (%rdx), %edi # sched: [10:1.00] ; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; ZNVER1-NEXT: crc32l (%rdx), %eax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) %2 = load i32, i32 *%a2 @@ -223,65 +223,65 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { ; GENERIC-LABEL: crc32_64_8: ; GENERIC: # BB#0: -; GENERIC-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; GENERIC-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_64_8: ; SLM: # BB#0: -; SLM-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SLM-NEXT: crc32b (%rdx), %edi # sched: [6:1.00] ; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SLM-NEXT: crc32b (%rdx), %eax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: crc32_64_8: ; SANDY: # BB#0: -; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] +; SANDY-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SANDY-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_64_8: ; HASWELL: # BB#0: -; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HASWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; HASWELL-NEXT: crc32b (%rdx), %eax # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BROADWELL-LABEL: crc32_64_8: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; BROADWELL-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; BROADWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; BROADWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_64_8: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_64_8: ; SKX: # BB#0: -; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKX-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKX-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_64_8: ; BTVER2: # BB#0: -; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; BTVER2-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.17] +; BTVER2-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; BTVER2-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: crc32_64_8: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; ZNVER1-NEXT: crc32b (%rdx), %edi # sched: [10:1.00] ; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25] +; ZNVER1-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; ZNVER1-NEXT: crc32b (%rdx), %eax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %a1) %2 = load i8, i8 *%a2 @@ -293,65 +293,65 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: crc32_64_64: ; GENERIC: # BB#0: -; GENERIC-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; GENERIC-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; GENERIC-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_64_64: ; SLM: # BB#0: -; SLM-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SLM-NEXT: crc32q (%rdx), %rdi # sched: [6:1.00] ; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SLM-NEXT: crc32q (%rdx), %rax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: crc32_64_64: ; SANDY: # BB#0: -; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SANDY-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] +; SANDY-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SANDY-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_64_64: ; HASWELL: # BB#0: -; HASWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; HASWELL-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00] ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HASWELL-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; HASWELL-NEXT: crc32q (%rdx), %rax # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BROADWELL-LABEL: crc32_64_64: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; BROADWELL-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; BROADWELL-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; BROADWELL-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_64_64: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_64_64: ; SKX: # BB#0: -; SKX-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SKX-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKX-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SKX-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_64_64: ; BTVER2: # BB#0: -; BTVER2-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; BTVER2-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.17] +; BTVER2-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; BTVER2-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: crc32_64_64: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; ZNVER1-NEXT: crc32q (%rdx), %rdi # sched: [10:1.00] ; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25] +; ZNVER1-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; ZNVER1-NEXT: crc32q (%rdx), %rax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1) %2 = load i64, i64 *%a2 Index: test/CodeGen/X86/subcarry.ll =================================================================== --- test/CodeGen/X86/subcarry.ll +++ test/CodeGen/X86/subcarry.ll @@ -6,23 +6,23 @@ define %S @negate(%S* nocapture readonly %this) { ; CHECK-LABEL: negate: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movq (%rsi), %rax -; CHECK-NEXT: movq 8(%rsi), %rcx -; CHECK-NEXT: notq %rax -; CHECK-NEXT: addq $1, %rax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq (%rsi), %rcx +; CHECK-NEXT: movq 8(%rsi), %rdx ; CHECK-NEXT: notq %rcx -; CHECK-NEXT: adcq $0, %rcx -; CHECK-NEXT: movq 16(%rsi), %rdx +; CHECK-NEXT: addq $1, %rcx ; CHECK-NEXT: notq %rdx ; CHECK-NEXT: adcq $0, %rdx +; CHECK-NEXT: movq 16(%rsi), %rdi +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: adcq $0, %rdi ; CHECK-NEXT: movq 24(%rsi), %rsi ; CHECK-NEXT: notq %rsi ; CHECK-NEXT: adcq $0, %rsi -; CHECK-NEXT: movq %rax, (%rdi) -; CHECK-NEXT: movq %rcx, 8(%rdi) -; CHECK-NEXT: movq %rdx, 16(%rdi) -; CHECK-NEXT: movq %rsi, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rcx, (%rax) +; CHECK-NEXT: movq %rdx, 8(%rax) +; CHECK-NEXT: movq %rdi, 16(%rax) +; CHECK-NEXT: movq %rsi, 24(%rax) ; CHECK-NEXT: retq entry: %0 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 0 @@ -63,29 +63,29 @@ define %S @sub(%S* nocapture readonly %this, %S %arg.b) local_unnamed_addr { ; CHECK-LABEL: sub: ; CHECK: # BB#0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: notq %rdx -; CHECK-NEXT: xorl %r10d, %r10d +; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: addq (%rsi), %rdx -; CHECK-NEXT: setb %r10b +; CHECK-NEXT: setb %dil ; CHECK-NEXT: addq $1, %rdx -; CHECK-NEXT: adcq 8(%rsi), %r10 -; CHECK-NEXT: setb %al -; CHECK-NEXT: movzbl %al, %r11d +; CHECK-NEXT: adcq 8(%rsi), %rdi +; CHECK-NEXT: setb %r10b +; CHECK-NEXT: movzbl %r10b, %r10d ; CHECK-NEXT: notq %rcx -; CHECK-NEXT: addq %r10, %rcx -; CHECK-NEXT: adcq 16(%rsi), %r11 -; CHECK-NEXT: setb %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: addq %rdi, %rcx +; CHECK-NEXT: adcq 16(%rsi), %r10 +; CHECK-NEXT: setb %dil +; CHECK-NEXT: movzbl %dil, %edi ; CHECK-NEXT: notq %r8 -; CHECK-NEXT: addq %r11, %r8 -; CHECK-NEXT: adcq 24(%rsi), %rax +; CHECK-NEXT: addq %r10, %r8 +; CHECK-NEXT: adcq 24(%rsi), %rdi ; CHECK-NEXT: notq %r9 -; CHECK-NEXT: addq %rax, %r9 -; CHECK-NEXT: movq %rdx, (%rdi) -; CHECK-NEXT: movq %rcx, 8(%rdi) -; CHECK-NEXT: movq %r8, 16(%rdi) -; CHECK-NEXT: movq %r9, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: addq %rdi, %r9 +; CHECK-NEXT: movq %rdx, (%rax) +; CHECK-NEXT: movq %rcx, 8(%rax) +; CHECK-NEXT: movq %r8, 16(%rax) +; CHECK-NEXT: movq %r9, 24(%rax) ; CHECK-NEXT: retq entry: %0 = extractvalue %S %arg.b, 0 Index: test/CodeGen/X86/swift-return.ll =================================================================== --- test/CodeGen/X86/swift-return.ll +++ test/CodeGen/X86/swift-return.ll @@ -266,9 +266,9 @@ ; CHECK-LABEL: gen7 ; CHECK: movl %edi, %eax -; CHECK: movl %edi, %edx -; CHECK: movl %edi, %ecx -; CHECK: movl %edi, %r8d +; CHECK: movl %eax, %edx +; CHECK: movl %eax, %ecx +; CHECK: movl %eax, %r8d ; CHECK: retq define swiftcc { i32, i32, i32, i32 } @gen7(i32 %key) { %v0 = insertvalue { i32, i32, i32, i32 } undef, i32 %key, 0 @@ -280,9 +280,9 @@ ; CHECK-LABEL: gen8 ; CHECK: movq %rdi, %rax -; CHECK: movq %rdi, %rdx -; CHECK: movq %rdi, %rcx -; CHECK: movq %rdi, %r8 +; CHECK: movq %rax, %rdx +; CHECK: movq %rax, %rcx +; CHECK: movq %rax, %r8 ; CHECK: retq define swiftcc { i64, i64, i64, i64 } @gen8(i64 %key) { %v0 = insertvalue { i64, i64, i64, i64 } undef, i64 %key, 0 @@ -294,9 +294,9 @@ ; CHECK-LABEL: gen9 ; CHECK: movl %edi, %eax -; CHECK: movl %edi, %edx -; CHECK: movl %edi, %ecx -; CHECK: movl %edi, %r8d +; CHECK: movl %eax, %edx +; CHECK: movl %eax, %ecx +; CHECK: movl %eax, %r8d ; CHECK: retq define swiftcc { i8, i8, i8, i8 } @gen9(i8 %key) { %v0 = insertvalue { i8, i8, i8, i8 } undef, i8 %key, 0 @@ -306,13 +306,13 @@ ret { i8, i8, i8, i8 } %v3 } ; CHECK-LABEL: gen10 +; CHECK: movq %rdi, %rax ; CHECK: movaps %xmm0, %xmm1 ; CHECK: movaps %xmm0, %xmm2 ; CHECK: movaps %xmm0, %xmm3 -; CHECK: movq %rdi, %rax -; CHECK: movq %rdi, %rdx -; CHECK: movq %rdi, %rcx -; CHECK: movq %rdi, %r8 +; CHECK: movq %rax, %rdx +; CHECK: movq %rax, %rcx +; CHECK: movq %rax, %r8 ; CHECK: retq define swiftcc { double, double, double, double, i64, i64, i64, i64 } @gen10(double %keyd, i64 %keyi) { %v0 = insertvalue { double, double, double, double, i64, i64, i64, i64 } undef, double %keyd, 0 Index: test/CodeGen/X86/swifterror.ll =================================================================== --- test/CodeGen/X86/swifterror.ll +++ test/CodeGen/X86/swifterror.ll @@ -34,11 +34,11 @@ ; CHECK-APPLE-LABEL: caller: ; CHECK-APPLE: xorl %r12d, %r12d ; CHECK-APPLE: callq {{.*}}foo -; CHECK-APPLE: testq %r12, %r12 +; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: testq %rdi, %rdi ; CHECK-APPLE: jne ; Access part of the error object and save it to error_ref -; CHECK-APPLE: movb 8(%r12) -; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: movb 8(%rdi) ; CHECK-APPLE: callq {{.*}}free ; CHECK-O0-LABEL: caller: @@ -247,12 +247,12 @@ ; CHECK-APPLE: movl $1, %esi ; CHECK-APPLE: xorl %r12d, %r12d ; CHECK-APPLE: callq {{.*}}foo_sret -; CHECK-APPLE: testq %r12, %r12 +; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: testq %rdi, %rdi ; CHECK-APPLE: jne ; Access part of the error object and save it to error_ref -; CHECK-APPLE: movb 8(%r12), +; CHECK-APPLE: movb 8(%rdi), ; CHECK-APPLE: movb %{{.*}}, -; CHECK-APPLE: movq %r12, %rdi ; CHECK-APPLE: callq {{.*}}free ; CHECK-O0-LABEL: caller3: @@ -297,21 +297,21 @@ ; The first swifterror value: ; CHECK-APPLE: xorl %r12d, %r12d ; CHECK-APPLE: callq {{.*}}foo -; CHECK-APPLE: testq %r12, %r12 +; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: testq %rdi, %rdi ; CHECK-APPLE: jne ; Access part of the error object and save it to error_ref -; CHECK-APPLE: movb 8(%r12) -; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: movb 8(%rdi) ; CHECK-APPLE: callq {{.*}}free ; The second swifterror value: ; CHECK-APPLE: xorl %r12d, %r12d ; CHECK-APPLE: callq {{.*}}foo -; CHECK-APPLE: testq %r12, %r12 +; CHECK-APPLE: testq %rdi, %rdi ; CHECK-APPLE: jne ; Access part of the error object and save it to error_ref -; CHECK-APPLE: movb 8(%r12) -; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: movb 8(%rdi) +; CHECK-APPLE: movb %al, (%r14) ; CHECK-APPLE: callq {{.*}}free ; CHECK-O0-LABEL: caller_with_multiple_swifterror_values: @@ -488,8 +488,8 @@ ; CHECK-i386: retl ; CHECK-APPLE-LABEL: empty_swiftcc: ; CHECK-APPLE: movl %edx, %ecx -; CHECK-APPLE: movl %edi, %eax ; CHECK-APPLE: movl %esi, %edx +; CHECK-APPLE: movl %edi, %eax ; CHECK-APPLE: retq define swiftcc {i32, i32, i32} @empty_swiftcc({i32, i32, i32} , %swift_error** swifterror %error_ptr_ref) { entry: Index: test/CodeGen/X86/system-intrinsics-xsetbv.ll =================================================================== --- test/CodeGen/X86/system-intrinsics-xsetbv.ll +++ test/CodeGen/X86/system-intrinsics-xsetbv.ll @@ -11,8 +11,8 @@ ; CHECK64-LABEL: test_xsetbv ; CHECK64: movl %edx, %eax -; CHECK64: movl %edi, %ecx ; CHECK64: movl %esi, %edx +; CHECK64: movl %edi, %ecx ; CHECK64: xsetbv ; CHECK64: ret Index: test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll =================================================================== --- test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll +++ test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll @@ -40,10 +40,10 @@ ; X64-LABEL: test__blcic_u64: ; X64: # BB#0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorq $-1, %rax -; X64-NEXT: addq $1, %rdi -; X64-NEXT: andq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: xorq $-1, %rcx +; X64-NEXT: addq $1, %rax +; X64-NEXT: andq %rcx, %rax ; X64-NEXT: retq %1 = xor i64 %a0, -1 %2 = add i64 %a0, 1 @@ -89,10 +89,10 @@ ; X64-LABEL: test__blsic_u64: ; X64: # BB#0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorq $-1, %rax -; X64-NEXT: subq $1, %rdi -; X64-NEXT: orq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: xorq $-1, %rcx +; X64-NEXT: subq $1, %rax +; X64-NEXT: orq %rcx, %rax ; X64-NEXT: retq %1 = xor i64 %a0, -1 %2 = sub i64 %a0, 1 @@ -104,10 +104,10 @@ ; X64-LABEL: test__t1mskc_u64: ; X64: # BB#0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorq $-1, %rax -; X64-NEXT: addq $1, %rdi -; X64-NEXT: orq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: xorq $-1, %rcx +; X64-NEXT: addq $1, %rax +; X64-NEXT: orq %rcx, %rax ; X64-NEXT: retq %1 = xor i64 %a0, -1 %2 = add i64 %a0, 1 @@ -119,10 +119,10 @@ ; X64-LABEL: test__tzmsk_u64: ; X64: # BB#0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorq $-1, %rax -; X64-NEXT: subq $1, %rdi -; X64-NEXT: andq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: xorq $-1, %rcx +; X64-NEXT: subq $1, %rax +; X64-NEXT: andq %rcx, %rax ; X64-NEXT: retq %1 = xor i64 %a0, -1 %2 = sub i64 %a0, 1 Index: test/CodeGen/X86/tbm-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/tbm-intrinsics-fast-isel.ll +++ test/CodeGen/X86/tbm-intrinsics-fast-isel.ll @@ -72,10 +72,10 @@ ; X64-LABEL: test__blcic_u32: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl $-1, %eax -; X64-NEXT: addl $1, %edi -; X64-NEXT: andl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: xorl $-1, %ecx +; X64-NEXT: addl $1, %eax +; X64-NEXT: andl %ecx, %eax ; X64-NEXT: retq %1 = xor i32 %a0, -1 %2 = add i32 %a0, 1 @@ -154,10 +154,10 @@ ; X64-LABEL: test__blsic_u32: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl $-1, %eax -; X64-NEXT: subl $1, %edi -; X64-NEXT: orl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: xorl $-1, %ecx +; X64-NEXT: subl $1, %eax +; X64-NEXT: orl %ecx, %eax ; X64-NEXT: retq %1 = xor i32 %a0, -1 %2 = sub i32 %a0, 1 @@ -178,10 +178,10 @@ ; X64-LABEL: test__t1mskc_u32: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl $-1, %eax -; X64-NEXT: addl $1, %edi -; X64-NEXT: orl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: xorl $-1, %ecx +; X64-NEXT: addl $1, %eax +; X64-NEXT: orl %ecx, %eax ; X64-NEXT: retq %1 = xor i32 %a0, -1 %2 = add i32 %a0, 1 @@ -202,10 +202,10 @@ ; X64-LABEL: test__tzmsk_u32: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl $-1, %eax -; X64-NEXT: subl $1, %edi -; X64-NEXT: andl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: xorl $-1, %ecx +; X64-NEXT: subl $1, %eax +; X64-NEXT: andl %ecx, %eax ; X64-NEXT: retq %1 = xor i32 %a0, -1 %2 = sub i32 %a0, 1 Index: test/CodeGen/X86/tbm_patterns.ll =================================================================== --- test/CodeGen/X86/tbm_patterns.ll +++ test/CodeGen/X86/tbm_patterns.ll @@ -52,10 +52,10 @@ define i32 @test_x86_tbm_bextri_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u32_z2: ; CHECK: # BB#0: +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: shrl $4, %edi ; CHECK-NEXT: testl $4095, %edi # imm = 0xFFF -; CHECK-NEXT: cmovnel %edx, %esi -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = lshr i32 %a, 4 %t1 = and i32 %t0, 4095 @@ -113,10 +113,10 @@ define i64 @test_x86_tbm_bextri_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u64_z2: ; CHECK: # BB#0: +; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: shrl $4, %edi ; CHECK-NEXT: testl $4095, %edi # imm = 0xFFF -; CHECK-NEXT: cmovneq %rdx, %rsi -; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = lshr i64 %a, 4 %t1 = and i64 %t0, 4095 @@ -151,11 +151,11 @@ define i32 @test_x86_tbm_blcfill_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcfill_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %EDI %EDI %RDI -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: testl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: %EDI %EDI %RDI +; CHECK-NEXT: leal 1(%rdi), %ecx +; CHECK-NEXT: testl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 %t1 = and i32 %t0, %a @@ -190,10 +190,10 @@ define i64 @test_x86_tbm_blcfill_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcfill_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: testq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq 1(%rdi), %rcx +; CHECK-NEXT: testq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 %t1 = and i64 %t0, %a @@ -230,12 +230,12 @@ define i32 @test_x86_tbm_blci_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blci_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %EDI %EDI %RDI -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: orl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: %EDI %EDI %RDI +; CHECK-NEXT: leal 1(%rdi), %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 1, %a %t1 = xor i32 %t0, -1 @@ -273,11 +273,11 @@ define i64 @test_x86_tbm_blci_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blci_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: orq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq 1(%rdi), %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: orq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 1, %a %t1 = xor i64 %t0, -1 @@ -335,12 +335,12 @@ define i32 @test_x86_tbm_blcic_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcic_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: incl %edi -; CHECK-NEXT: testl %eax, %edi -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: incl %edi +; CHECK-NEXT: testl %ecx, %edi +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, 1 @@ -378,12 +378,12 @@ define i64 @test_x86_tbm_blcic_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcic_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: incq %rdi -; CHECK-NEXT: testq %rax, %rdi -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: testq %rcx, %rdi +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, 1 @@ -419,11 +419,11 @@ define i32 @test_x86_tbm_blcmsk_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcmsk_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %EDI %EDI %RDI -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: xorl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: %EDI %EDI %RDI +; CHECK-NEXT: leal 1(%rdi), %ecx +; CHECK-NEXT: xorl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 %t1 = xor i32 %t0, %a @@ -458,10 +458,10 @@ define i64 @test_x86_tbm_blcmsk_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcmsk_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: xorq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq 1(%rdi), %rcx +; CHECK-NEXT: xorq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 %t1 = xor i64 %t0, %a @@ -496,11 +496,11 @@ define i32 @test_x86_tbm_blcs_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcs_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %EDI %EDI %RDI -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: orl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: %EDI %EDI %RDI +; CHECK-NEXT: leal 1(%rdi), %ecx +; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 %t1 = or i32 %t0, %a @@ -535,10 +535,10 @@ define i64 @test_x86_tbm_blcs_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcs_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: orq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq 1(%rdi), %rcx +; CHECK-NEXT: orq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 %t1 = or i64 %t0, %a @@ -573,11 +573,11 @@ define i32 @test_x86_tbm_blsfill_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blsfill_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %EDI %EDI %RDI -; CHECK-NEXT: leal -1(%rdi), %eax -; CHECK-NEXT: orl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: %EDI %EDI %RDI +; CHECK-NEXT: leal -1(%rdi), %ecx +; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, -1 %t1 = or i32 %t0, %a @@ -612,10 +612,10 @@ define i64 @test_x86_tbm_blsfill_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blsfill_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: leaq -1(%rdi), %rax -; CHECK-NEXT: orq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq -1(%rdi), %rcx +; CHECK-NEXT: orq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, -1 %t1 = or i64 %t0, %a @@ -652,12 +652,12 @@ define i32 @test_x86_tbm_blsic_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blsic_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: decl %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: decl %edi +; CHECK-NEXT: orl %ecx, %edi +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, -1 @@ -695,12 +695,12 @@ define i64 @test_x86_tbm_blsic_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blsic_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: decq %rdi -; CHECK-NEXT: orq %rax, %rdi -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: decq %rdi +; CHECK-NEXT: orq %rcx, %rdi +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, -1 @@ -739,12 +739,12 @@ define i32 @test_x86_tbm_t1mskc_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_t1mskc_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: incl %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: incl %edi +; CHECK-NEXT: orl %ecx, %edi +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, 1 @@ -783,12 +783,12 @@ define i64 @test_x86_tbm_t1mskc_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_t1mskc_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: incq %rdi -; CHECK-NEXT: orq %rax, %rdi -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: orq %rcx, %rdi +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, 1 @@ -827,12 +827,12 @@ define i32 @test_x86_tbm_tzmsk_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_tzmsk_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: decl %edi -; CHECK-NEXT: testl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: decl %edi +; CHECK-NEXT: testl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, -1 @@ -871,12 +871,12 @@ define i64 @test_x86_tbm_tzmsk_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_tzmsk_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: decq %rdi -; CHECK-NEXT: testq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: decq %rdi +; CHECK-NEXT: testq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, -1 Index: test/CodeGen/X86/twoaddr-lea.ll =================================================================== --- test/CodeGen/X86/twoaddr-lea.ll +++ test/CodeGen/X86/twoaddr-lea.ll @@ -11,8 +11,8 @@ define i32 @test1(i32 %X) nounwind { ; CHECK-LABEL: test1: -; CHECK-NOT: mov -; CHECK: leal 1(%rdi) +; CHECK-NOT: mov ??FAILING?: movl %edi, %eax +; CHECK: leal 1(%rax) %Z = add i32 %X, 1 store volatile i32 %Z, i32* @G ret i32 %X Index: test/CodeGen/X86/umul-with-overflow.ll =================================================================== --- test/CodeGen/X86/umul-with-overflow.ll +++ test/CodeGen/X86/umul-with-overflow.ll @@ -15,8 +15,8 @@ ; ; X64-LABEL: a: ; X64: # BB#0: -; X64-NEXT: movl $3, %ecx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl $3, %ecx ; X64-NEXT: mull %ecx ; X64-NEXT: seto %al ; X64-NEXT: retq Index: test/CodeGen/X86/urem-power-of-two.ll =================================================================== --- test/CodeGen/X86/urem-power-of-two.ll +++ test/CodeGen/X86/urem-power-of-two.ll @@ -14,8 +14,8 @@ ; ; X64-LABEL: const_pow_2: ; X64: # BB#0: -; X64-NEXT: andl $31, %edi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andl $31, %eax ; X64-NEXT: retq %urem = urem i64 %x, 32 ret i64 %urem @@ -35,8 +35,9 @@ ; ; X64-LABEL: shift_left_pow_2: ; X64: # BB#0: -; X64-NEXT: movl $1, %eax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl $1, %eax +; X64-NEXT: # kill: %CL %CL %ECX ; X64-NEXT: shll %cl, %eax ; X64-NEXT: addl $33554431, %eax # imm = 0x1FFFFFF ; X64-NEXT: andl %edi, %eax @@ -61,8 +62,9 @@ ; ; X64-LABEL: shift_right_pow_2: ; X64: # BB#0: -; X64-NEXT: movl $32768, %eax # imm = 0x8000 ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl $32768, %eax # imm = 0x8000 +; X64-NEXT: # kill: %CL %CL %ECX ; X64-NEXT: shrl %cl, %eax ; X64-NEXT: decl %eax ; X64-NEXT: andl %edi, %eax Index: test/CodeGen/X86/use-add-flags.ll =================================================================== --- test/CodeGen/X86/use-add-flags.ll +++ test/CodeGen/X86/use-add-flags.ll @@ -10,16 +10,16 @@ define i32 @test1(i32* %x, i32 %y, i32 %a, i32 %b) nounwind { ; LNX-LABEL: test1: ; LNX: # BB#0: -; LNX-NEXT: addl (%rdi), %esi -; LNX-NEXT: cmovnsl %ecx, %edx ; LNX-NEXT: movl %edx, %eax +; LNX-NEXT: addl (%rdi), %esi +; LNX-NEXT: cmovnsl %ecx, %eax ; LNX-NEXT: retq ; ; WIN-LABEL: test1: ; WIN: # BB#0: -; WIN-NEXT: addl (%rcx), %edx -; WIN-NEXT: cmovnsl %r9d, %r8d ; WIN-NEXT: movl %r8d, %eax +; WIN-NEXT: addl (%rcx), %edx +; WIN-NEXT: cmovnsl %r9d, %eax ; WIN-NEXT: retq %tmp2 = load i32, i32* %x, align 4 ; [#uses=1] %tmp4 = add i32 %tmp2, %y ; [#uses=1] Index: test/CodeGen/X86/vector-bitreverse.ll =================================================================== --- test/CodeGen/X86/vector-bitreverse.ll +++ test/CodeGen/X86/vector-bitreverse.ll @@ -11,38 +11,40 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind { ; SSE-LABEL: test_bitreverse_i8: ; SSE: # BB#0: -; SSE-NEXT: rolb $4, %dil -; SSE-NEXT: movl %edi, %eax -; SSE-NEXT: andb $51, %al -; SSE-NEXT: shlb $2, %al -; SSE-NEXT: andb $-52, %dil -; SSE-NEXT: shrb $2, %dil -; SSE-NEXT: orb %al, %dil -; SSE-NEXT: movl %edi, %eax -; SSE-NEXT: andb $85, %al -; SSE-NEXT: addb %al, %al -; SSE-NEXT: andb $-86, %dil -; SSE-NEXT: shrb %dil -; SSE-NEXT: orb %al, %dil ; SSE-NEXT: movl %edi, %eax +; SSE-NEXT: rolb $4, %al +; SSE-NEXT: movl %eax, %ecx +; SSE-NEXT: andb $51, %cl +; SSE-NEXT: shlb $2, %cl +; SSE-NEXT: andb $-52, %al +; SSE-NEXT: shrb $2, %al +; SSE-NEXT: orb %cl, %al +; SSE-NEXT: movl %eax, %ecx +; SSE-NEXT: andb $85, %cl +; SSE-NEXT: addb %cl, %cl +; SSE-NEXT: andb $-86, %al +; SSE-NEXT: shrb %al +; SSE-NEXT: orb %cl, %al +; SSE-NEXT: # kill: %AL %AL %EAX ; SSE-NEXT: retq ; ; AVX-LABEL: test_bitreverse_i8: ; AVX: # BB#0: -; AVX-NEXT: rolb $4, %dil -; AVX-NEXT: movl %edi, %eax -; AVX-NEXT: andb $51, %al -; AVX-NEXT: shlb $2, %al -; AVX-NEXT: andb $-52, %dil -; AVX-NEXT: shrb $2, %dil -; AVX-NEXT: orb %al, %dil -; AVX-NEXT: movl %edi, %eax -; AVX-NEXT: andb $85, %al -; AVX-NEXT: addb %al, %al -; AVX-NEXT: andb $-86, %dil -; AVX-NEXT: shrb %dil -; AVX-NEXT: orb %al, %dil ; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: rolb $4, %al +; AVX-NEXT: movl %eax, %ecx +; AVX-NEXT: andb $51, %cl +; AVX-NEXT: shlb $2, %cl +; AVX-NEXT: andb $-52, %al +; AVX-NEXT: shrb $2, %al +; AVX-NEXT: orb %cl, %al +; AVX-NEXT: movl %eax, %ecx +; AVX-NEXT: andb $85, %cl +; AVX-NEXT: addb %cl, %cl +; AVX-NEXT: andb $-86, %al +; AVX-NEXT: shrb %al +; AVX-NEXT: orb %cl, %al +; AVX-NEXT: # kill: %AL %AL %EAX ; AVX-NEXT: retq ; ; XOP-LABEL: test_bitreverse_i8: Index: test/CodeGen/X86/vector-blend.ll =================================================================== --- test/CodeGen/X86/vector-blend.ll +++ test/CodeGen/X86/vector-blend.ll @@ -358,30 +358,30 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) { ; SSE2-LABEL: vsel_double8: ; SSE2: # BB#0: # %entry +; SSE2-NEXT: movaps %xmm7, %xmm3 +; SSE2-NEXT: movaps %xmm5, %xmm1 ; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] ; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] ; SSE2-NEXT: movapd %xmm4, %xmm0 -; SSE2-NEXT: movaps %xmm5, %xmm1 ; SSE2-NEXT: movapd %xmm6, %xmm2 -; SSE2-NEXT: movaps %xmm7, %xmm3 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: vsel_double8: ; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: movaps %xmm7, %xmm3 +; SSSE3-NEXT: movaps %xmm5, %xmm1 ; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] ; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] ; SSSE3-NEXT: movapd %xmm4, %xmm0 -; SSSE3-NEXT: movaps %xmm5, %xmm1 ; SSSE3-NEXT: movapd %xmm6, %xmm2 -; SSSE3-NEXT: movaps %xmm7, %xmm3 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: vsel_double8: ; SSE41: # BB#0: # %entry +; SSE41-NEXT: movaps %xmm7, %xmm3 +; SSE41-NEXT: movaps %xmm5, %xmm1 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm4[1] ; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm6[1] -; SSE41-NEXT: movaps %xmm5, %xmm1 -; SSE41-NEXT: movaps %xmm7, %xmm3 ; SSE41-NEXT: retq ; ; AVX-LABEL: vsel_double8: @@ -397,30 +397,30 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) { ; SSE2-LABEL: vsel_i648: ; SSE2: # BB#0: # %entry +; SSE2-NEXT: movaps %xmm7, %xmm3 +; SSE2-NEXT: movaps %xmm5, %xmm1 ; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] ; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] ; SSE2-NEXT: movapd %xmm4, %xmm0 -; SSE2-NEXT: movaps %xmm5, %xmm1 ; SSE2-NEXT: movapd %xmm6, %xmm2 -; SSE2-NEXT: movaps %xmm7, %xmm3 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: vsel_i648: ; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: movaps %xmm7, %xmm3 +; SSSE3-NEXT: movaps %xmm5, %xmm1 ; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] ; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] ; SSSE3-NEXT: movapd %xmm4, %xmm0 -; SSSE3-NEXT: movaps %xmm5, %xmm1 ; SSSE3-NEXT: movapd %xmm6, %xmm2 -; SSSE3-NEXT: movaps %xmm7, %xmm3 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: vsel_i648: ; SSE41: # BB#0: # %entry +; SSE41-NEXT: movaps %xmm7, %xmm3 +; SSE41-NEXT: movaps %xmm5, %xmm1 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7] ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7] -; SSE41-NEXT: movaps %xmm5, %xmm1 -; SSE41-NEXT: movaps %xmm7, %xmm3 ; SSE41-NEXT: retq ; ; AVX1-LABEL: vsel_i648: @@ -554,22 +554,22 @@ define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) { ; SSE2-LABEL: constant_blendvpd_avx: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] ; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] ; SSE2-NEXT: movapd %xmm3, %xmm1 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: constant_blendvpd_avx: ; SSSE3: # BB#0: # %entry -; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] ; SSSE3-NEXT: movaps %xmm2, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] ; SSSE3-NEXT: movapd %xmm3, %xmm1 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: constant_blendvpd_avx: ; SSE41: # BB#0: # %entry -; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm3[1] ; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm3[1] ; SSE41-NEXT: retq ; ; AVX-LABEL: constant_blendvpd_avx: @@ -767,20 +767,20 @@ define <4 x i64> @blend_shufflevector_4xi64(<4 x i64> %a, <4 x i64> %b) { ; SSE2-LABEL: blend_shufflevector_4xi64: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE2-NEXT: movaps %xmm3, %xmm1 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: blend_shufflevector_4xi64: ; SSSE3: # BB#0: # %entry -; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSSE3-NEXT: movaps %xmm3, %xmm1 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: blend_shufflevector_4xi64: ; SSE41: # BB#0: # %entry -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] ; SSE41-NEXT: movaps %xmm3, %xmm1 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: blend_shufflevector_4xi64: Index: test/CodeGen/X86/vector-compare-results.ll =================================================================== --- test/CodeGen/X86/vector-compare-results.ll +++ test/CodeGen/X86/vector-compare-results.ll @@ -344,210 +344,210 @@ define <32 x i1> @test_cmp_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32i8: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtb %xmm2, %xmm0 ; SSE2-NEXT: pcmpgtb %xmm3, %xmm1 ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32i8: ; SSE42: # BB#0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtb %xmm2, %xmm0 ; SSE42-NEXT: pcmpgtb %xmm3, %xmm1 -; SSE42-NEXT: pextrb $15, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $14, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $13, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $11, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $10, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $9, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $7, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $6, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $5, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $3, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $1, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $15, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $14, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $13, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $11, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $10, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $9, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $7, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $6, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $5, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $3, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $2, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $1, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $15, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $14, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $13, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $11, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $10, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $9, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $7, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $6, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $5, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $3, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $1, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $15, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $14, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $13, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $11, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $10, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $9, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $7, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $6, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $5, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $3, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $2, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $1, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32i8: @@ -885,6 +885,7 @@ define <32 x i1> @test_cmp_v32i16(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32i16: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtw %xmm5, %xmm1 ; SSE2-NEXT: pcmpgtw %xmm4, %xmm0 ; SSE2-NEXT: packsswb %xmm1, %xmm0 @@ -892,209 +893,208 @@ ; SSE2-NEXT: pcmpgtw %xmm6, %xmm2 ; SSE2-NEXT: packsswb %xmm3, %xmm2 ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32i16: ; SSE42: # BB#0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtw %xmm4, %xmm0 ; SSE42-NEXT: pcmpgtw %xmm5, %xmm1 ; SSE42-NEXT: pcmpgtw %xmm6, %xmm2 ; SSE42-NEXT: pcmpgtw %xmm7, %xmm3 -; SSE42-NEXT: pextrb $14, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $10, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $6, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $14, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $10, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $6, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $14, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $10, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $6, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $2, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $14, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $10, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $6, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $2, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $14, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $10, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $6, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $14, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $10, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $6, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $14, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $10, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $6, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $2, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $14, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $10, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $6, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $2, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32i16: @@ -1155,815 +1155,815 @@ define <64 x i1> @test_cmp_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind { ; SSE2-LABEL: test_cmp_v64i8: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtb %xmm4, %xmm0 ; SSE2-NEXT: pcmpgtb %xmm5, %xmm1 ; SSE2-NEXT: pcmpgtb %xmm6, %xmm2 ; SSE2-NEXT: pcmpgtb %xmm7, %xmm3 ; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 6(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 6(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 4(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movq %rdi, %rax -; SSE2-NEXT: retq -; -; SSE42-LABEL: test_cmp_v64i8: -; SSE42: # BB#0: -; SSE42-NEXT: pcmpgtb %xmm4, %xmm0 -; SSE42-NEXT: pcmpgtb %xmm5, %xmm1 -; SSE42-NEXT: pcmpgtb %xmm6, %xmm2 -; SSE42-NEXT: pcmpgtb %xmm7, %xmm3 -; SSE42-NEXT: pextrb $15, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $14, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $13, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $12, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $11, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $10, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $9, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $8, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $7, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $6, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $5, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $4, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $3, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $2, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $1, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $0, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $15, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $14, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $13, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $12, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $11, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $10, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $9, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $8, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $7, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $6, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $5, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $4, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $3, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $2, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $1, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $0, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $15, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $14, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $13, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $11, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $10, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $9, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $7, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $6, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $5, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $3, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $1, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $15, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $14, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $13, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $11, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $10, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $9, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $7, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $6, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $5, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $3, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $2, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $1, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: movq %rdi, %rax -; SSE42-NEXT: retq -; -; AVX1-LABEL: test_cmp_v64i8: -; AVX1: # BB#0: -; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpextrb $15, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $14, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $13, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $12, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $11, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $10, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $9, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $8, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $7, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $6, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $5, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $4, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $3, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $2, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $1, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $0, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $15, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $14, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $13, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $12, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $11, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $10, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $9, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $8, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $7, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $6, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $5, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $4, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $3, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $2, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $1, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $0, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: movq %rdi, %rax -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: test_cmp_v64i8: +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 4(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: retq +; +; SSE42-LABEL: test_cmp_v64i8: +; SSE42: # BB#0: +; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pcmpgtb %xmm4, %xmm0 +; SSE42-NEXT: pcmpgtb %xmm5, %xmm1 +; SSE42-NEXT: pcmpgtb %xmm6, %xmm2 +; SSE42-NEXT: pcmpgtb %xmm7, %xmm3 +; SSE42-NEXT: pextrb $15, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $14, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $13, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $12, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $11, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $10, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $9, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $8, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $7, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $6, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $5, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $4, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $3, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $2, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $1, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $0, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $15, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $14, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $13, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $12, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $11, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $10, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $9, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $8, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $7, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $6, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $5, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $4, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $3, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $2, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $1, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $0, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $15, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $14, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $13, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $11, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $10, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $9, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $7, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $6, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $5, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $3, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $1, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $15, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $14, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $13, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $11, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $10, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $9, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $7, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $6, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $5, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $3, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $2, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $1, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: retq +; +; AVX1-LABEL: test_cmp_v64i8: +; AVX1: # BB#0: +; AVX1-NEXT: movq %rdi, %rax +; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpextrb $15, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $14, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $13, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $12, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $11, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $10, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $9, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $8, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $7, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $6, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $5, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $4, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $3, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $2, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $1, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $0, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $15, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $14, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $13, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $12, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $11, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $10, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $9, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $8, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $7, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $6, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $5, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $4, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $3, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $2, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $1, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $0, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $15, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $14, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $13, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $12, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $11, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $10, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $9, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $8, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $7, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $6, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $5, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $4, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $3, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $2, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $1, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $0, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $15, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $14, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $13, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $12, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $11, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $10, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $9, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $8, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $7, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $6, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $5, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $4, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $3, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $2, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $1, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $0, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_cmp_v64i8: ; AVX2: # BB#0: +; AVX2-NEXT: movq %rdi, %rax ; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrb $15, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $15, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) +; AVX2-NEXT: vpextrb $15, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $14, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $13, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $12, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $11, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $10, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $9, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $8, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $7, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $6, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $5, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $4, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $3, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $2, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $1, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $0, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $15, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $14, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $13, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $12, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $11, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $10, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $9, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $8, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $7, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $6, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $5, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $4, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $3, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $2, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $1, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $0, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpextrb $15, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $14, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $13, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $11, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $9, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $7, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $5, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $3, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $1, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $0, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $15, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $14, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $13, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $11, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $9, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $7, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $5, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $3, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $1, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $0, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: movq %rdi, %rax +; AVX2-NEXT: vpextrb $15, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $14, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $13, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $12, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $11, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $10, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $9, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $8, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $7, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $6, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $5, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $4, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $3, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $2, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $1, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $0, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $15, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $14, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $13, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $12, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $11, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $10, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $9, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $8, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $7, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $6, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $5, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $4, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $3, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $2, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $1, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $0, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -2190,6 +2190,7 @@ define <32 x i1> @test_cmp_v32f32(<32 x float> %a0, <32 x float> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32f32: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9 ; SSE2-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11 ; SSE2-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10 @@ -2213,108 +2214,108 @@ ; SSE2-NEXT: packssdw %xmm11, %xmm9 ; SSE2-NEXT: packsswb %xmm10, %xmm9 ; SSE2-NEXT: movdqa %xmm9, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) ; SSE2-NEXT: movdqa %xmm8, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32f32: ; SSE42: # BB#0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: movaps {{[0-9]+}}(%rsp), %xmm15 ; SSE42-NEXT: movaps {{[0-9]+}}(%rsp), %xmm14 ; SSE42-NEXT: movaps {{[0-9]+}}(%rsp), %xmm13 @@ -2331,109 +2332,108 @@ ; SSE42-NEXT: cmpltps %xmm5, %xmm13 ; SSE42-NEXT: cmpltps %xmm6, %xmm14 ; SSE42-NEXT: cmpltps %xmm7, %xmm15 -; SSE42-NEXT: pextrb $12, %xmm15, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm15, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm15, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm15, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm14, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm14, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm14, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm14, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm13, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm13, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm13, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm13, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm12, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm12, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm12, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm12, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm11, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm11, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm11, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm11, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm10, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm10, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm10, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm10, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm9, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm9, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm9, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm9, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm8, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm8, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm8, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm8, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: movq %rdi, %rax -; SSE42-NEXT: retq -; -; AVX1-LABEL: test_cmp_v32f32: -; AVX1: # BB#0: -; AVX1-NEXT: vcmpltps %ymm3, %ymm7, %ymm3 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm7 +; SSE42-NEXT: pextrb $12, %xmm15, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm15, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm15, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm15, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm14, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm14, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm14, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm14, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm13, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm13, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm13, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm13, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm12, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm12, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm12, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm12, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm11, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm11, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm11, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm11, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm10, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm10, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm10, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm10, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm9, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm9, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm9, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm9, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm8, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm8, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm8, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm8, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: retq +; +; AVX1-LABEL: test_cmp_v32f32: +; AVX1: # BB#0: +; AVX1-NEXT: vcmpltps %ymm3, %ymm7, %ymm3 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm7 ; AVX1-NEXT: vpackssdw %xmm7, %xmm3, %xmm3 ; AVX1-NEXT: vcmpltps %ymm2, %ymm6, %ymm2 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6 @@ -3375,6 +3375,7 @@ define <32 x i1> @test_cmp_v32i32(<32 x i32> %a0, <32 x i32> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32i32: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm3 ; SSE2-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm2 ; SSE2-NEXT: packssdw %xmm3, %xmm2 @@ -3390,108 +3391,108 @@ ; SSE2-NEXT: packssdw %xmm5, %xmm4 ; SSE2-NEXT: packsswb %xmm6, %xmm4 ; SSE2-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32i32: ; SSE42: # BB#0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm1 ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm2 @@ -3500,103 +3501,102 @@ ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm5 ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm6 ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm7 -; SSE42-NEXT: pextrb $12, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $12, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32i32: @@ -4301,6 +4301,7 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind { ; SSE2-LABEL: test_cmp_v64i16: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm1 ; SSE2-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm0 ; SSE2-NEXT: packsswb %xmm1, %xmm0 @@ -4314,206 +4315,206 @@ ; SSE2-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm6 ; SSE2-NEXT: packsswb %xmm7, %xmm6 ; SSE2-NEXT: movdqa %xmm6, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 6(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 6(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) ; SSE2-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 4(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 4(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v64i16: ; SSE42: # BB#0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm1 ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm2 @@ -4522,203 +4523,203 @@ ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm5 ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm6 ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm7 -; SSE42-NEXT: pextrb $14, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $12, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $10, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $8, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $6, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $4, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $2, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $0, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $14, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $12, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $10, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $8, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $6, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $4, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $2, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $0, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $14, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $12, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $10, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $8, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $6, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $4, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $2, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $0, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $14, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $12, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $10, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $8, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $6, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $4, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $2, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $0, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $14, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $10, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $6, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $14, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $10, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $6, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $14, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $10, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $6, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $2, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $14, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $10, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $6, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $2, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $14, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $12, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $10, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $8, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $6, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $4, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $2, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $0, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $14, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $12, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $10, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $8, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $6, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $4, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $2, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $0, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $14, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $12, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $10, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $8, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $6, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $4, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $2, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $0, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $14, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $12, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $10, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $8, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $6, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $4, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $2, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $0, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $14, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $10, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $6, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $14, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $10, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $6, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $14, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $10, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $6, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $2, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $14, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $10, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $6, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $2, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v64i16: ; AVX1: # BB#0: +; AVX1-NEXT: movq %rdi, %rax ; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm8 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 @@ -4735,204 +4736,204 @@ ; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 ; AVX1-NEXT: vpcmpgtw %xmm7, %xmm3, %xmm3 -; AVX1-NEXT: vpextrb $14, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm6, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm6, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm6, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm6, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm6, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm6, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm6, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm6, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm5, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm5, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm5, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm5, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm5, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm5, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm5, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm5, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $12, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $10, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $8, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $6, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $4, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $2, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $0, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $14, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $12, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $10, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $8, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $6, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $4, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $2, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $0, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $14, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $12, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $10, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $8, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $6, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $4, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $2, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $0, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $14, %xmm8, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $12, %xmm8, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $10, %xmm8, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $8, %xmm8, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $6, %xmm8, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $4, %xmm8, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $2, %xmm8, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $0, %xmm8, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: movq %rdi, %rax +; AVX1-NEXT: vpextrb $14, %xmm3, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $12, %xmm3, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $10, %xmm3, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $8, %xmm3, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $6, %xmm3, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $4, %xmm3, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $2, %xmm3, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $0, %xmm3, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $14, %xmm6, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $12, %xmm6, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $10, %xmm6, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $8, %xmm6, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $6, %xmm6, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $4, %xmm6, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $2, %xmm6, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $0, %xmm6, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $14, %xmm2, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $12, %xmm2, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $10, %xmm2, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $8, %xmm2, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $6, %xmm2, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $4, %xmm2, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $2, %xmm2, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $0, %xmm2, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $14, %xmm5, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $12, %xmm5, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $10, %xmm5, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $8, %xmm5, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $6, %xmm5, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $4, %xmm5, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $2, %xmm5, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $0, %xmm5, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $14, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $12, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $10, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $8, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $6, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $4, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $2, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $0, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $14, %xmm4, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $12, %xmm4, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $10, %xmm4, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $8, %xmm4, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $6, %xmm4, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $4, %xmm4, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $2, %xmm4, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $0, %xmm4, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $14, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $12, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $10, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $8, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $6, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $4, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $2, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $0, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $14, %xmm8, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $12, %xmm8, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $10, %xmm8, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $8, %xmm8, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $6, %xmm8, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $4, %xmm8, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $2, %xmm8, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $0, %xmm8, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_cmp_v64i16: ; AVX2: # BB#0: +; AVX2-NEXT: movq %rdi, %rax ; AVX2-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 ; AVX2-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1 @@ -4941,199 +4942,198 @@ ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm6 ; AVX2-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3 ; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm7 -; AVX2-NEXT: vpextrb $14, %xmm7, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm7, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm7, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm7, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm7, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm7, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm7, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm7, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm5, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm5, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm5, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm5, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm5, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm5, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm5, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $0, %xmm5, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $14, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $0, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $14, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $0, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $14, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $0, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: movq %rdi, %rax +; AVX2-NEXT: vpextrb $14, %xmm7, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $12, %xmm7, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $10, %xmm7, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $8, %xmm7, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $6, %xmm7, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $4, %xmm7, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $2, %xmm7, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $0, %xmm7, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $14, %xmm3, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $12, %xmm3, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $10, %xmm3, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $8, %xmm3, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $6, %xmm3, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $4, %xmm3, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $2, %xmm3, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $0, %xmm3, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $14, %xmm6, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $12, %xmm6, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $10, %xmm6, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $8, %xmm6, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $6, %xmm6, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $4, %xmm6, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $2, %xmm6, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $0, %xmm6, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $14, %xmm2, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $12, %xmm2, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $10, %xmm2, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $8, %xmm2, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $6, %xmm2, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $4, %xmm2, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $2, %xmm2, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $0, %xmm2, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $14, %xmm5, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $12, %xmm5, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $10, %xmm5, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $8, %xmm5, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $6, %xmm5, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $4, %xmm5, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $2, %xmm5, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $0, %xmm5, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $14, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $12, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $10, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $8, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $6, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $4, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $2, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $0, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $14, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $12, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $10, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $8, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $6, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $4, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $2, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $0, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $14, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $12, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $10, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $8, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $6, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $4, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $2, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $0, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -6128,6 +6128,7 @@ ; SSE2-LABEL: test_cmp_v128i8: ; SSE2: # BB#0: ; SSE2-NEXT: pushq %rax +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm0 ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm1 ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm2 @@ -6137,403 +6138,403 @@ ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm6 ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm7 ; SSE2-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 14(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 14(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) ; SSE2-NEXT: movdqa %xmm6, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 12(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 12(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) ; SSE2-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 10(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 10(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) ; SSE2-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 8(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 8(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) ; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 6(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 6(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 4(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 4(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) ; SSE2-NEXT: popq %rcx ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v128i8: ; SSE42: # BB#0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm1 ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm2 @@ -6542,395 +6543,395 @@ ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm5 ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm6 ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm7 -; SSE42-NEXT: pextrb $15, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $14, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $13, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $12, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $11, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $10, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $9, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $8, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $7, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $6, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $5, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $4, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $3, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $2, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $1, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $0, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $15, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $14, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $13, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $12, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $11, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $10, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $9, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $8, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $7, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $6, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $5, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $4, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $3, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $2, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $1, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $0, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $15, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $14, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $13, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $12, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $11, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $10, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $9, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $8, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $7, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $6, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $5, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $4, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $3, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $2, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $1, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $0, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $15, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $14, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $13, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $12, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $11, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $10, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $9, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $8, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $7, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $6, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $5, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $4, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $3, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $2, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $1, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $0, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $15, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $14, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $13, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $12, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $11, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $10, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $9, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $8, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $7, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $6, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $5, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $4, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $3, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $2, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $1, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $0, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $15, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $14, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $13, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $12, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $11, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $10, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $9, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $8, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $7, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $6, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $5, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $4, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $3, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $2, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $1, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $0, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $15, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $14, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $13, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $11, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $10, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $9, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $7, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $6, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $5, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $3, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $1, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $15, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $14, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $13, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $11, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $10, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $9, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $7, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $6, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $5, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $3, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $2, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $1, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $15, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $14, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $13, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $12, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $11, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $10, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $9, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $8, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $7, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $6, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $5, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $4, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $3, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $2, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $1, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $0, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $15, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $14, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $13, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $12, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $11, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $10, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $9, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $8, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $7, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $6, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $5, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $4, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $3, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $2, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $1, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $0, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $15, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $14, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $13, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $12, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $11, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $10, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $9, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $8, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $7, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $6, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $5, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $4, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $3, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $2, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $1, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $0, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $15, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $14, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $13, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $12, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $11, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $10, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $9, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $8, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $7, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $6, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $5, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $4, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $3, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $2, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $1, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $0, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $15, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $14, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $13, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $12, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $11, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $10, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $9, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $8, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $7, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $6, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $5, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $4, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $3, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $2, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $1, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $0, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $15, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $14, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $13, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $12, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $11, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $10, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $9, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $8, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $7, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $6, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $5, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $4, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $3, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $2, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $1, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $0, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $15, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $14, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $13, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $11, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $10, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $9, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $7, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $6, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $5, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $3, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $1, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $15, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $14, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $13, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $11, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $10, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $9, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $7, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $6, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $5, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $3, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $2, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $1, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v128i8: ; AVX1: # BB#0: +; AVX1-NEXT: movq %rdi, %rax ; AVX1-NEXT: vpcmpgtb %xmm4, %xmm0, %xmm8 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 @@ -6947,794 +6948,794 @@ ; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 ; AVX1-NEXT: vpcmpgtb %xmm7, %xmm3, %xmm3 -; AVX1-NEXT: vpextrb $15, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $14, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $13, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $12, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $11, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $10, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $9, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $8, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $7, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $6, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $5, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $4, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $3, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $2, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $1, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $0, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $15, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $14, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $13, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $12, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $11, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $10, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $9, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $8, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $7, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $6, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $5, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $4, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $3, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $2, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $1, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $0, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: movq %rdi, %rax +; AVX1-NEXT: vpextrb $15, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $14, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $13, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $12, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $11, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $10, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $9, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $8, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $7, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $6, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $5, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $4, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $3, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $2, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $1, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $0, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $15, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $14, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $13, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $12, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $11, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $10, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $9, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $8, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $7, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $6, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $5, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $4, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $3, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $2, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $1, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $0, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $15, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $14, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $13, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $12, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $11, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $10, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $9, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $8, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $7, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $6, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $5, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $4, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $3, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $2, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $1, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $0, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $15, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $14, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $13, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $12, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $11, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $10, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $9, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $8, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $7, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $6, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $5, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $4, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $3, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $2, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $1, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $0, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $15, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $14, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $13, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $12, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $11, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $10, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $9, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $8, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $7, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $6, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $5, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $4, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $3, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $2, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $1, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $0, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $15, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $14, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $13, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $12, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $11, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $10, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $9, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $8, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $7, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $6, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $5, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $4, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $3, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $2, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $1, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $0, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $15, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $14, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $13, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $12, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $11, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $10, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $9, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $8, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $7, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $6, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $5, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $4, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $3, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $2, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $1, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $0, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $15, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $14, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $13, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $12, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $11, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $10, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $9, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $8, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $7, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $6, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $5, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $4, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $3, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $2, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $1, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $0, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_cmp_v128i8: ; AVX2: # BB#0: +; AVX2-NEXT: movq %rdi, %rax ; AVX2-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm1 ; AVX2-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm2 ; AVX2-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm3 ; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 -; AVX2-NEXT: vpextrb $15, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $15, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) +; AVX2-NEXT: vpextrb $15, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $14, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $13, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $12, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $11, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $10, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $9, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $8, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $7, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $6, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $5, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $4, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $3, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $2, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $1, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $0, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $15, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $14, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $13, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $12, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $11, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $10, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $9, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $8, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $7, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $6, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $5, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $4, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $3, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $2, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $1, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $0, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX2-NEXT: vpextrb $15, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $15, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) +; AVX2-NEXT: vpextrb $15, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $14, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $13, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $12, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $11, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $10, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $9, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $8, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $7, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $6, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $5, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $4, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $3, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $2, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $1, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $0, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $15, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $14, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $13, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $12, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $11, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $10, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $9, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $8, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $7, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $6, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $5, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $4, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $3, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $2, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $1, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $0, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrb $15, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $15, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) +; AVX2-NEXT: vpextrb $15, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $14, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $13, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $12, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $11, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $10, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $9, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $8, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $7, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $6, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $5, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $4, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $3, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $2, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $1, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $0, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $15, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $14, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $13, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $12, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $11, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $10, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $9, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $8, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $7, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $6, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $5, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $4, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $3, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $2, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $1, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $0, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpextrb $15, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $14, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $13, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $11, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $9, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $7, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $5, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $3, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $1, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $0, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $15, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $14, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $13, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $11, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $9, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $7, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $5, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $3, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $1, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $0, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: movq %rdi, %rax +; AVX2-NEXT: vpextrb $15, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $14, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $13, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $12, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $11, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $10, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $9, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $8, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $7, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $6, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $5, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $4, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $3, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $2, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $1, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $0, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $15, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $14, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $13, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $12, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $11, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $10, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $9, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $8, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $7, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $6, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $5, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $4, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $3, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $2, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $1, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $0, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512F-LABEL: test_cmp_v128i8: ; AVX512F: # BB#0: +; AVX512F-NEXT: movq %rdi, %rax ; AVX512F-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm1 ; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm2 @@ -7743,44 +7744,44 @@ ; AVX512F-NEXT: vpmovsxbd %xmm4, %zmm4 ; AVX512F-NEXT: vpslld $31, %zmm4, %zmm4 ; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k0 -; AVX512F-NEXT: kmovw %k0, 14(%rdi) +; AVX512F-NEXT: kmovw %k0, 14(%rax) ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 ; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512F-NEXT: kmovw %k0, 12(%rdi) +; AVX512F-NEXT: kmovw %k0, 12(%rax) ; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 ; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512F-NEXT: kmovw %k0, 10(%rdi) +; AVX512F-NEXT: kmovw %k0, 10(%rax) ; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 ; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512F-NEXT: kmovw %k0, 8(%rdi) +; AVX512F-NEXT: kmovw %k0, 8(%rax) ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 ; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512F-NEXT: kmovw %k0, 6(%rdi) +; AVX512F-NEXT: kmovw %k0, 6(%rax) ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 ; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, 4(%rdi) +; AVX512F-NEXT: kmovw %k0, 4(%rax) ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 ; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, 2(%rdi) +; AVX512F-NEXT: kmovw %k0, 2(%rax) ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, (%rdi) -; AVX512F-NEXT: movq %rdi, %rax +; AVX512F-NEXT: kmovw %k0, (%rax) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512DQ-LABEL: test_cmp_v128i8: ; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: movq %rdi, %rax ; AVX512DQ-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm2 @@ -7789,39 +7790,38 @@ ; AVX512DQ-NEXT: vpmovsxbd %xmm4, %zmm4 ; AVX512DQ-NEXT: vpslld $31, %zmm4, %zmm4 ; AVX512DQ-NEXT: vptestmd %zmm4, %zmm4, %k0 -; AVX512DQ-NEXT: kmovw %k0, 14(%rdi) +; AVX512DQ-NEXT: kmovw %k0, 14(%rax) ; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 ; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3 ; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512DQ-NEXT: kmovw %k0, 12(%rdi) +; AVX512DQ-NEXT: kmovw %k0, 12(%rax) ; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 ; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3 ; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512DQ-NEXT: kmovw %k0, 10(%rdi) +; AVX512DQ-NEXT: kmovw %k0, 10(%rax) ; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 ; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 ; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512DQ-NEXT: kmovw %k0, 8(%rdi) +; AVX512DQ-NEXT: kmovw %k0, 8(%rax) ; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 ; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 ; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512DQ-NEXT: kmovw %k0, 6(%rdi) +; AVX512DQ-NEXT: kmovw %k0, 6(%rax) ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512DQ-NEXT: kmovw %k0, 4(%rdi) +; AVX512DQ-NEXT: kmovw %k0, 4(%rax) ; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) +; AVX512DQ-NEXT: kmovw %k0, 2(%rax) ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512DQ-NEXT: kmovw %k0, (%rdi) -; AVX512DQ-NEXT: movq %rdi, %rax +; AVX512DQ-NEXT: kmovw %k0, (%rax) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; @@ -7843,6 +7843,7 @@ define <32 x i1> @test_cmp_v32f64(<32 x double> %a0, <32 x double> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32f64: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: cmpltpd %xmm5, %xmm8 ; SSE2-NEXT: movapd {{[0-9]+}}(%rsp), %xmm5 @@ -7922,104 +7923,103 @@ ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1] ; SSE2-NEXT: packsswb %xmm2, %xmm1 ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32f64: @@ -8030,6 +8030,7 @@ ; SSE42-NEXT: pushq %r13 ; SSE42-NEXT: pushq %r12 ; SSE42-NEXT: pushq %rbx +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8 ; SSE42-NEXT: cmpltpd %xmm7, %xmm8 ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm7 @@ -8108,12 +8109,12 @@ ; SSE42-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] ; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm4[4,5,6,7] ; SSE42-NEXT: packsswb %xmm2, %xmm1 -; SSE42-NEXT: pextrb $15, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $14, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) +; SSE42-NEXT: pextrb $15, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $14, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) ; SSE42-NEXT: pextrb $13, %xmm1, %r8d ; SSE42-NEXT: pextrb $12, %xmm1, %r9d ; SSE42-NEXT: pextrb $11, %xmm1, %r10d @@ -8124,44 +8125,44 @@ ; SSE42-NEXT: pextrb $6, %xmm1, %r13d ; SSE42-NEXT: pextrb $5, %xmm1, %ebx ; SSE42-NEXT: pextrb $4, %xmm1, %ebp -; SSE42-NEXT: pextrb $3, %xmm1, %eax -; SSE42-NEXT: pextrb $2, %xmm1, %ecx -; SSE42-NEXT: pextrb $1, %xmm1, %edx -; SSE42-NEXT: pextrb $0, %xmm1, %esi +; SSE42-NEXT: pextrb $3, %xmm1, %ecx +; SSE42-NEXT: pextrb $2, %xmm1, %edx +; SSE42-NEXT: pextrb $1, %xmm1, %esi +; SSE42-NEXT: pextrb $0, %xmm1, %edi ; SSE42-NEXT: andb $1, %r8b -; SSE42-NEXT: movb %r8b, 2(%rdi) +; SSE42-NEXT: movb %r8b, 2(%rax) ; SSE42-NEXT: andb $1, %r9b -; SSE42-NEXT: movb %r9b, 2(%rdi) +; SSE42-NEXT: movb %r9b, 2(%rax) ; SSE42-NEXT: andb $1, %r10b -; SSE42-NEXT: movb %r10b, 2(%rdi) +; SSE42-NEXT: movb %r10b, 2(%rax) ; SSE42-NEXT: andb $1, %r11b -; SSE42-NEXT: movb %r11b, 2(%rdi) +; SSE42-NEXT: movb %r11b, 2(%rax) ; SSE42-NEXT: andb $1, %r14b -; SSE42-NEXT: movb %r14b, 2(%rdi) +; SSE42-NEXT: movb %r14b, 2(%rax) ; SSE42-NEXT: andb $1, %r15b -; SSE42-NEXT: movb %r15b, 2(%rdi) +; SSE42-NEXT: movb %r15b, 2(%rax) ; SSE42-NEXT: andb $1, %r12b -; SSE42-NEXT: movb %r12b, 2(%rdi) +; SSE42-NEXT: movb %r12b, 2(%rax) ; SSE42-NEXT: andb $1, %r13b -; SSE42-NEXT: movb %r13b, 2(%rdi) +; SSE42-NEXT: movb %r13b, 2(%rax) ; SSE42-NEXT: andb $1, %bl -; SSE42-NEXT: movb %bl, 2(%rdi) +; SSE42-NEXT: movb %bl, 2(%rax) ; SSE42-NEXT: andb $1, %bpl -; SSE42-NEXT: movb %bpl, 2(%rdi) -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) +; SSE42-NEXT: movb %bpl, 2(%rax) ; SSE42-NEXT: andb $1, %cl -; SSE42-NEXT: movb %cl, 2(%rdi) +; SSE42-NEXT: movb %cl, 2(%rax) ; SSE42-NEXT: andb $1, %dl -; SSE42-NEXT: movb %dl, 2(%rdi) +; SSE42-NEXT: movb %dl, 2(%rax) ; SSE42-NEXT: andb $1, %sil -; SSE42-NEXT: movb %sil, 2(%rdi) -; SSE42-NEXT: pextrb $15, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $14, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) +; SSE42-NEXT: movb %sil, 2(%rax) +; SSE42-NEXT: andb $1, %dil +; SSE42-NEXT: movb %dil, 2(%rax) +; SSE42-NEXT: pextrb $15, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $14, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) ; SSE42-NEXT: pextrb $13, %xmm0, %r8d ; SSE42-NEXT: pextrb $12, %xmm0, %r9d ; SSE42-NEXT: pextrb $11, %xmm0, %r10d @@ -8172,39 +8173,38 @@ ; SSE42-NEXT: pextrb $6, %xmm0, %r13d ; SSE42-NEXT: pextrb $5, %xmm0, %ebx ; SSE42-NEXT: pextrb $4, %xmm0, %ebp -; SSE42-NEXT: pextrb $3, %xmm0, %eax -; SSE42-NEXT: pextrb $2, %xmm0, %ecx -; SSE42-NEXT: pextrb $1, %xmm0, %edx -; SSE42-NEXT: pextrb $0, %xmm0, %esi +; SSE42-NEXT: pextrb $3, %xmm0, %ecx +; SSE42-NEXT: pextrb $2, %xmm0, %edx +; SSE42-NEXT: pextrb $1, %xmm0, %esi +; SSE42-NEXT: pextrb $0, %xmm0, %edi ; SSE42-NEXT: andb $1, %r8b -; SSE42-NEXT: movb %r8b, (%rdi) +; SSE42-NEXT: movb %r8b, (%rax) ; SSE42-NEXT: andb $1, %r9b -; SSE42-NEXT: movb %r9b, (%rdi) +; SSE42-NEXT: movb %r9b, (%rax) ; SSE42-NEXT: andb $1, %r10b -; SSE42-NEXT: movb %r10b, (%rdi) +; SSE42-NEXT: movb %r10b, (%rax) ; SSE42-NEXT: andb $1, %r11b -; SSE42-NEXT: movb %r11b, (%rdi) +; SSE42-NEXT: movb %r11b, (%rax) ; SSE42-NEXT: andb $1, %r14b -; SSE42-NEXT: movb %r14b, (%rdi) +; SSE42-NEXT: movb %r14b, (%rax) ; SSE42-NEXT: andb $1, %r15b -; SSE42-NEXT: movb %r15b, (%rdi) +; SSE42-NEXT: movb %r15b, (%rax) ; SSE42-NEXT: andb $1, %r12b -; SSE42-NEXT: movb %r12b, (%rdi) +; SSE42-NEXT: movb %r12b, (%rax) ; SSE42-NEXT: andb $1, %r13b -; SSE42-NEXT: movb %r13b, (%rdi) +; SSE42-NEXT: movb %r13b, (%rax) ; SSE42-NEXT: andb $1, %bl -; SSE42-NEXT: movb %bl, (%rdi) +; SSE42-NEXT: movb %bl, (%rax) ; SSE42-NEXT: andb $1, %bpl -; SSE42-NEXT: movb %bpl, (%rdi) -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) +; SSE42-NEXT: movb %bpl, (%rax) ; SSE42-NEXT: andb $1, %cl -; SSE42-NEXT: movb %cl, (%rdi) +; SSE42-NEXT: movb %cl, (%rax) ; SSE42-NEXT: andb $1, %dl -; SSE42-NEXT: movb %dl, (%rdi) +; SSE42-NEXT: movb %dl, (%rax) ; SSE42-NEXT: andb $1, %sil -; SSE42-NEXT: movb %sil, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: movb %sil, (%rax) +; SSE42-NEXT: andb $1, %dil +; SSE42-NEXT: movb %dil, (%rax) ; SSE42-NEXT: popq %rbx ; SSE42-NEXT: popq %r12 ; SSE42-NEXT: popq %r13 @@ -8987,6 +8987,7 @@ define <32 x i1> @test_cmp_v32i64(<32 x i64> %a0, <32 x i64> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32i64: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0] ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 ; SSE2-NEXT: pxor %xmm8, %xmm9 @@ -9221,108 +9222,108 @@ ; SSE2-NEXT: andpd %xmm10, %xmm1 ; SSE2-NEXT: packuswb %xmm4, %xmm1 ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) ; SSE2-NEXT: movdqa %xmm9, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32i64: ; SSE42: # BB#0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 ; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 ; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 @@ -9381,103 +9382,102 @@ ; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm9[4,5,6,7] ; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm8[0,1],xmm0[2,3,4,5,6,7] ; SSE42-NEXT: packsswb %xmm3, %xmm0 -; SSE42-NEXT: pextrb $15, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $14, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $13, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $11, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $10, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $9, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $7, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $6, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $5, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $3, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $1, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $15, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $14, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $13, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $11, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $10, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $9, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $7, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $6, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $5, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $3, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $2, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $1, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $15, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $14, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $13, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $11, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $10, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $9, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $7, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $6, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $5, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $3, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $1, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $15, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $14, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $13, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $11, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $10, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $9, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $7, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $6, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $5, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $3, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $2, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $1, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32i64: Index: test/CodeGen/X86/vector-interleave.ll =================================================================== --- test/CodeGen/X86/vector-interleave.ll +++ test/CodeGen/X86/vector-interleave.ll @@ -10,6 +10,7 @@ define <64 x i16> @interleave8x8(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e, <8 x i16> %f, <8 x i16> %h, <8 x i16> %g) { ; SSE-LABEL: interleave8x8: ; SSE: # BB#0: +; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: movdqa %xmm0, %xmm8 ; SSE-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm1[0],xmm8[1],xmm1[1],xmm8[2],xmm1[2],xmm8[3],xmm1[3] ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] @@ -46,15 +47,14 @@ ; SSE-NEXT: movdqa %xmm3, %xmm4 ; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1],xmm4[2],xmm6[2],xmm4[3],xmm6[3] ; SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm6[4],xmm3[5],xmm6[5],xmm3[6],xmm6[6],xmm3[7],xmm6[7] -; SSE-NEXT: movdqa %xmm3, 112(%rdi) -; SSE-NEXT: movdqa %xmm4, 96(%rdi) -; SSE-NEXT: movdqa %xmm0, 80(%rdi) -; SSE-NEXT: movdqa %xmm7, 64(%rdi) -; SSE-NEXT: movdqa %xmm2, 48(%rdi) -; SSE-NEXT: movdqa %xmm1, 32(%rdi) -; SSE-NEXT: movdqa %xmm8, 16(%rdi) -; SSE-NEXT: movdqa %xmm5, (%rdi) -; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: movdqa %xmm3, 112(%rax) +; SSE-NEXT: movdqa %xmm4, 96(%rax) +; SSE-NEXT: movdqa %xmm0, 80(%rax) +; SSE-NEXT: movdqa %xmm7, 64(%rax) +; SSE-NEXT: movdqa %xmm2, 48(%rax) +; SSE-NEXT: movdqa %xmm1, 32(%rax) +; SSE-NEXT: movdqa %xmm8, 16(%rax) +; SSE-NEXT: movdqa %xmm5, (%rax) ; SSE-NEXT: retq ; ; AVX1-LABEL: interleave8x8: Index: test/CodeGen/X86/vector-pcmp.ll =================================================================== --- test/CodeGen/X86/vector-pcmp.ll +++ test/CodeGen/X86/vector-pcmp.ll @@ -86,14 +86,14 @@ define <1 x i128> @test_strange_type(<1 x i128> %x) { ; SSE2-LABEL: test_strange_type: ; SSE2: # BB#0: -; SSE2-NEXT: sarq $63, %rsi -; SSE2-NEXT: movq %rsi, %xmm0 -; SSE2-NEXT: notq %rsi +; SSE2-NEXT: movq %rsi, %rdx +; SSE2-NEXT: sarq $63, %rdx +; SSE2-NEXT: movq %rdx, %xmm0 +; SSE2-NEXT: notq %rdx ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm0, %xmm1 ; SSE2-NEXT: movq %xmm1, %rax -; SSE2-NEXT: movq %rsi, %rdx ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_strange_type: Index: test/CodeGen/X86/vector-rotate-128.ll =================================================================== --- test/CodeGen/X86/vector-rotate-128.ll +++ test/CodeGen/X86/vector-rotate-128.ll @@ -351,60 +351,61 @@ ; ; SSE41-LABEL: var_rotate_v8i16: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] -; SSE41-NEXT: psubw %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: psubw %xmm3, %xmm2 +; SSE41-NEXT: movdqa %xmm3, %xmm0 ; SSE41-NEXT: psllw $12, %xmm0 -; SSE41-NEXT: psllw $4, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm4 +; SSE41-NEXT: psllw $4, %xmm3 +; SSE41-NEXT: por %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm4 ; SSE41-NEXT: paddw %xmm4, %xmm4 -; SSE41-NEXT: movdqa %xmm3, %xmm6 +; SSE41-NEXT: movdqa %xmm1, %xmm6 ; SSE41-NEXT: psllw $8, %xmm6 -; SSE41-NEXT: movdqa %xmm3, %xmm5 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm1, %xmm5 +; SSE41-NEXT: movdqa %xmm3, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm6, %xmm5 -; SSE41-NEXT: movdqa %xmm5, %xmm1 -; SSE41-NEXT: psllw $4, %xmm1 +; SSE41-NEXT: movdqa %xmm5, %xmm3 +; SSE41-NEXT: psllw $4, %xmm3 ; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm5 -; SSE41-NEXT: movdqa %xmm5, %xmm1 -; SSE41-NEXT: psllw $2, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm5 +; SSE41-NEXT: movdqa %xmm5, %xmm3 +; SSE41-NEXT: psllw $2, %xmm3 ; SSE41-NEXT: paddw %xmm4, %xmm4 ; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm5 -; SSE41-NEXT: movdqa %xmm5, %xmm1 -; SSE41-NEXT: psllw $1, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm5 +; SSE41-NEXT: movdqa %xmm5, %xmm3 +; SSE41-NEXT: psllw $1, %xmm3 ; SSE41-NEXT: paddw %xmm4, %xmm4 ; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm5 +; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm5 ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: psllw $12, %xmm0 ; SSE41-NEXT: psllw $4, %xmm2 ; SSE41-NEXT: por %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: paddw %xmm1, %xmm1 -; SSE41-NEXT: movdqa %xmm3, %xmm4 +; SSE41-NEXT: movdqa %xmm2, %xmm3 +; SSE41-NEXT: paddw %xmm3, %xmm3 +; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: psrlw $8, %xmm4 ; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm2 +; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: psrlw $4, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm2 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: psrlw $2, %xmm2 -; SSE41-NEXT: paddw %xmm1, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm2 +; SSE41-NEXT: paddw %xmm3, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: psrlw $1, %xmm2 -; SSE41-NEXT: paddw %xmm1, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm3 -; SSE41-NEXT: por %xmm5, %xmm3 +; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: por %xmm5, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: var_rotate_v8i16: Index: test/CodeGen/X86/vector-shift-ashr-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-ashr-128.ll +++ test/CodeGen/X86/vector-shift-ashr-128.ll @@ -267,32 +267,33 @@ ; ; SSE41-LABEL: var_shift_v8i16: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: psllw $12, %xmm0 -; SSE41-NEXT: psllw $4, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: psllw $4, %xmm2 +; SSE41-NEXT: por %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm3 ; SSE41-NEXT: paddw %xmm3, %xmm3 -; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: psraw $8, %xmm4 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psraw $4, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psraw $4, %xmm2 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psraw $2, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psraw $2, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psraw $1, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psraw $1, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: var_shift_v8i16: Index: test/CodeGen/X86/vector-shift-lshr-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-lshr-128.ll +++ test/CodeGen/X86/vector-shift-lshr-128.ll @@ -237,32 +237,33 @@ ; ; SSE41-LABEL: var_shift_v8i16: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: psllw $12, %xmm0 -; SSE41-NEXT: psllw $4, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: psllw $4, %xmm2 +; SSE41-NEXT: por %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm3 ; SSE41-NEXT: paddw %xmm3, %xmm3 -; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: psrlw $8, %xmm4 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psrlw $4, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psrlw $4, %xmm2 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psrlw $2, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psrlw $2, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psrlw $1, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psrlw $1, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: var_shift_v8i16: Index: test/CodeGen/X86/vector-shift-shl-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-shl-128.ll +++ test/CodeGen/X86/vector-shift-shl-128.ll @@ -194,32 +194,33 @@ ; ; SSE41-LABEL: var_shift_v8i16: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: psllw $12, %xmm0 -; SSE41-NEXT: psllw $4, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: psllw $4, %xmm2 +; SSE41-NEXT: por %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm3 ; SSE41-NEXT: paddw %xmm3, %xmm3 -; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: psllw $8, %xmm4 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psllw $4, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psllw $4, %xmm2 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psllw $2, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psllw $2, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psllw $1, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psllw $1, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: var_shift_v8i16: Index: test/CodeGen/X86/vector-shuffle-128-v2.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v2.ll +++ test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -164,8 +164,8 @@ define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) { ; SSE2-LABEL: shuffle_v2f64_22: ; SSE2: # BB#0: -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2f64_22: @@ -193,8 +193,8 @@ define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_32: ; SSE: # BB#0: -; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] ; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_32: @@ -208,8 +208,8 @@ define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_33: ; SSE: # BB#0: -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_33: @@ -316,8 +316,8 @@ define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_02_copy: ; SSE: # BB#0: -; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_02_copy: @@ -371,26 +371,26 @@ define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_03_copy: ; SSE2: # BB#0: -; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] ; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_03_copy: ; SSE3: # BB#0: -; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] ; SSE3-NEXT: movapd %xmm2, %xmm0 +; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_03_copy: ; SSSE3: # BB#0: -; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] ; SSSE3-NEXT: movapd %xmm2, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_03_copy: ; SSE41: # BB#0: -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] ; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: shuffle_v2i64_03_copy: @@ -443,26 +443,26 @@ define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_12_copy: ; SSE2: # BB#0: -; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] ; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm2[0] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_12_copy: ; SSE3: # BB#0: -; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] ; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm2[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_12_copy: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_12_copy: ; SSE41: # BB#0: -; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] ; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_12_copy: @@ -488,8 +488,8 @@ define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_13_copy: ; SSE: # BB#0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_13_copy: @@ -516,8 +516,8 @@ define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_20_copy: ; SSE: # BB#0: -; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_20_copy: @@ -568,26 +568,26 @@ define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_21_copy: ; SSE2: # BB#0: -; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_21_copy: ; SSE3: # BB#0: -; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_21_copy: ; SSSE3: # BB#0: -; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSSE3-NEXT: movapd %xmm1, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_21_copy: ; SSE41: # BB#0: -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] ; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: shuffle_v2i64_21_copy: @@ -640,26 +640,26 @@ define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_30_copy: ; SSE2: # BB#0: -; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] ; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_30_copy: ; SSE3: # BB#0: -; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] ; SSE3-NEXT: movapd %xmm2, %xmm0 +; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_30_copy: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_30_copy: ; SSE41: # BB#0: -; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_30_copy: @@ -686,8 +686,8 @@ define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_31_copy: ; SSE: # BB#0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_31_copy: Index: test/CodeGen/X86/vector-shuffle-combining-sse4a.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining-sse4a.ll +++ test/CodeGen/X86/vector-shuffle-combining-sse4a.ll @@ -33,8 +33,8 @@ define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) { ; SSSE3-LABEL: combine_insertqi_pshufb_16i8: ; SSSE3: # BB#0: -; SSSE3-NEXT: extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] ; SSSE3-NEXT: retq ; ; SSE42-LABEL: combine_insertqi_pshufb_16i8: @@ -54,8 +54,8 @@ define <8 x i16> @combine_insertqi_pshufb_8i16(<8 x i16> %a0, <8 x i16> %a1) { ; SSSE3-LABEL: combine_insertqi_pshufb_8i16: ; SSSE3: # BB#0: -; SSSE3-NEXT: extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] ; SSSE3-NEXT: retq ; ; SSE42-LABEL: combine_insertqi_pshufb_8i16: Index: test/CodeGen/X86/vector-shuffle-combining-ssse3.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -606,8 +606,8 @@ define <16 x i8> @combine_unpckl_arg1_pshufb(<16 x i8> %a0, <16 x i8> %a1) { ; SSE-LABEL: combine_unpckl_arg1_pshufb: ; SSE: # BB#0: -; SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero ; SSE-NEXT: retq ; ; AVX-LABEL: combine_unpckl_arg1_pshufb: Index: test/CodeGen/X86/vector-shuffle-combining.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining.ll +++ test/CodeGen/X86/vector-shuffle-combining.ll @@ -1705,8 +1705,8 @@ define <4 x float> @combine_test1b(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: combine_test1b: ; SSE: # BB#0: -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,2,0] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,2,0] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test1b: @@ -1721,8 +1721,8 @@ define <4 x float> @combine_test2b(<4 x float> %a, <4 x float> %b) { ; SSE2-LABEL: combine_test2b: ; SSE2: # BB#0: -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: combine_test2b: @@ -1776,8 +1776,8 @@ define <4 x float> @combine_test4b(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: combine_test4b: ; SSE: # BB#0: -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test4b: @@ -2846,30 +2846,30 @@ define <8 x float> @PR22412(<8 x float> %a, <8 x float> %b) { ; SSE2-LABEL: PR22412: ; SSE2: # BB#0: # %entry +; SSE2-NEXT: movaps %xmm3, %xmm1 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] ; SSE2-NEXT: movapd %xmm2, %xmm0 -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2] -; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,2] -; SSE2-NEXT: movaps %xmm3, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[3,2] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[3,2] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: PR22412: ; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: movaps %xmm3, %xmm1 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] ; SSSE3-NEXT: movapd %xmm2, %xmm0 -; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2] -; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,2] -; SSSE3-NEXT: movaps %xmm3, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[3,2] +; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[3,2] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: PR22412: ; SSE41: # BB#0: # %entry -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] -; SSE41-NEXT: movapd %xmm0, %xmm1 -; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm3[3,2] -; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm0[3,2] -; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] +; SSE41-NEXT: movapd %xmm0, %xmm2 +; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[3,2] +; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[3,2] +; SSE41-NEXT: movaps %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: PR22412: Index: test/CodeGen/X86/vector-zext.ll =================================================================== --- test/CodeGen/X86/vector-zext.ll +++ test/CodeGen/X86/vector-zext.ll @@ -2100,6 +2100,7 @@ define <32 x i32> @zext_32i8_to_32i32(<32 x i8> %x) { ; SSE2-LABEL: zext_32i8_to_32i32: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] @@ -2119,19 +2120,19 @@ ; SSE2-NEXT: movdqa %xmm1, %xmm4 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; SSE2-NEXT: movdqa %xmm1, 112(%rdi) -; SSE2-NEXT: movdqa %xmm4, 96(%rdi) -; SSE2-NEXT: movdqa %xmm6, 80(%rdi) -; SSE2-NEXT: movdqa %xmm7, 64(%rdi) -; SSE2-NEXT: movdqa %xmm0, 48(%rdi) -; SSE2-NEXT: movdqa %xmm5, 32(%rdi) -; SSE2-NEXT: movdqa %xmm3, 16(%rdi) -; SSE2-NEXT: movdqa %xmm8, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movdqa %xmm1, 112(%rax) +; SSE2-NEXT: movdqa %xmm4, 96(%rax) +; SSE2-NEXT: movdqa %xmm6, 80(%rax) +; SSE2-NEXT: movdqa %xmm7, 64(%rax) +; SSE2-NEXT: movdqa %xmm0, 48(%rax) +; SSE2-NEXT: movdqa %xmm5, 32(%rax) +; SSE2-NEXT: movdqa %xmm3, 16(%rax) +; SSE2-NEXT: movdqa %xmm8, (%rax) ; SSE2-NEXT: retq ; ; SSSE3-LABEL: zext_32i8_to_32i32: ; SSSE3: # BB#0: +; SSSE3-NEXT: movq %rdi, %rax ; SSSE3-NEXT: pxor %xmm2, %xmm2 ; SSSE3-NEXT: movdqa %xmm0, %xmm3 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] @@ -2151,19 +2152,19 @@ ; SSSE3-NEXT: movdqa %xmm1, %xmm4 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; SSSE3-NEXT: movdqa %xmm1, 112(%rdi) -; SSSE3-NEXT: movdqa %xmm4, 96(%rdi) -; SSSE3-NEXT: movdqa %xmm6, 80(%rdi) -; SSSE3-NEXT: movdqa %xmm7, 64(%rdi) -; SSSE3-NEXT: movdqa %xmm0, 48(%rdi) -; SSSE3-NEXT: movdqa %xmm5, 32(%rdi) -; SSSE3-NEXT: movdqa %xmm3, 16(%rdi) -; SSSE3-NEXT: movdqa %xmm8, (%rdi) -; SSSE3-NEXT: movq %rdi, %rax +; SSSE3-NEXT: movdqa %xmm1, 112(%rax) +; SSSE3-NEXT: movdqa %xmm4, 96(%rax) +; SSSE3-NEXT: movdqa %xmm6, 80(%rax) +; SSSE3-NEXT: movdqa %xmm7, 64(%rax) +; SSSE3-NEXT: movdqa %xmm0, 48(%rax) +; SSSE3-NEXT: movdqa %xmm5, 32(%rax) +; SSSE3-NEXT: movdqa %xmm3, 16(%rax) +; SSSE3-NEXT: movdqa %xmm8, (%rax) ; SSSE3-NEXT: retq ; ; SSE41-LABEL: zext_32i8_to_32i32: ; SSE41: # BB#0: +; SSE41-NEXT: movq %rdi, %rax ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3] ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero @@ -2178,15 +2179,14 @@ ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero -; SSE41-NEXT: movdqa %xmm1, 112(%rdi) -; SSE41-NEXT: movdqa %xmm7, 96(%rdi) -; SSE41-NEXT: movdqa %xmm6, 80(%rdi) -; SSE41-NEXT: movdqa %xmm5, 64(%rdi) -; SSE41-NEXT: movdqa %xmm0, 48(%rdi) -; SSE41-NEXT: movdqa %xmm4, 32(%rdi) -; SSE41-NEXT: movdqa %xmm3, 16(%rdi) -; SSE41-NEXT: movdqa %xmm2, (%rdi) -; SSE41-NEXT: movq %rdi, %rax +; SSE41-NEXT: movdqa %xmm1, 112(%rax) +; SSE41-NEXT: movdqa %xmm7, 96(%rax) +; SSE41-NEXT: movdqa %xmm6, 80(%rax) +; SSE41-NEXT: movdqa %xmm5, 64(%rax) +; SSE41-NEXT: movdqa %xmm0, 48(%rax) +; SSE41-NEXT: movdqa %xmm4, 32(%rax) +; SSE41-NEXT: movdqa %xmm3, 16(%rax) +; SSE41-NEXT: movdqa %xmm2, (%rax) ; SSE41-NEXT: retq ; ; AVX1-LABEL: zext_32i8_to_32i32: Index: test/CodeGen/X86/vectorcall.ll =================================================================== --- test/CodeGen/X86/vectorcall.ll +++ test/CodeGen/X86/vectorcall.ll @@ -22,7 +22,8 @@ } ; X86-LABEL: {{^}}test_int_3@@8: ; X64-LABEL: {{^}}test_int_3@@8: -; CHECK: movl %ecx, %eax +; X64: movq %rcx, %rax +; X86: movl %ecx, %eax define x86_vectorcallcc i32 @test_int_4(i32 inreg %a, i32 inreg %b) { %s = add i32 %a, %b @@ -148,8 +149,8 @@ ret <4 x float> %0 } ; CHECK-LABEL: test_mixed_5 -; CHECK: movaps %xmm5, 16(%{{(e|r)}}sp) ; CHECK: movaps %xmm5, %xmm0 +; CHECK: movaps %xmm0, 16(%{{(e|r)}}sp) ; CHECK: ret{{[ql]}} define x86_vectorcallcc %struct.HVA4 @test_mixed_6(%struct.HVA4 inreg %a, %struct.HVA4* %b) { @@ -183,12 +184,12 @@ ret void } ; CHECK-LABEL: test_mixed_7 -; CHECK: movaps %xmm{{[0-9]}}, 64(%{{rcx|eax}}) -; CHECK: movaps %xmm{{[0-9]}}, 48(%{{rcx|eax}}) -; CHECK: movaps %xmm{{[0-9]}}, 32(%{{rcx|eax}}) -; CHECK: movaps %xmm{{[0-9]}}, 16(%{{rcx|eax}}) -; CHECK: movaps %xmm{{[0-9]}}, (%{{rcx|eax}}) ; X64: mov{{[ql]}} %rcx, %rax +; CHECK: movaps %xmm{{[0-9]}}, 64(%{{esp|rsp}}) +; CHECK: movaps %xmm{{[0-9]}}, 48(%{{esp|rsp}}) +; CHECK: movaps %xmm{{[0-9]}}, 32(%{{esp|rsp}}) +; CHECK: movaps %xmm{{[0-9]}}, 16(%{{esp|rsp}}) +; CHECK: movaps %xmm{{[0-9]}}, (%{{esp|rsp}}) ; CHECK: ret{{[ql]}} define x86_vectorcallcc <4 x float> @test_mixed_8(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f) { Index: test/CodeGen/X86/vselect-minmax.ll =================================================================== --- test/CodeGen/X86/vselect-minmax.ll +++ test/CodeGen/X86/vselect-minmax.ll @@ -4830,26 +4830,27 @@ ; ; SSE4-LABEL: test121: ; SSE4: # BB#0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 ; SSE4-NEXT: movdqa %xmm7, %xmm9 -; SSE4-NEXT: pcmpgtq %xmm3, %xmm9 ; SSE4-NEXT: movdqa %xmm6, %xmm10 -; SSE4-NEXT: pcmpgtq %xmm2, %xmm10 -; SSE4-NEXT: movdqa %xmm5, %xmm11 -; SSE4-NEXT: pcmpgtq %xmm1, %xmm11 +; SSE4-NEXT: movdqa %xmm5, %xmm7 +; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa %xmm9, %xmm11 +; SSE4-NEXT: pcmpgtq %xmm3, %xmm11 +; SSE4-NEXT: pcmpgtq %xmm2, %xmm6 +; SSE4-NEXT: pcmpgtq %xmm1, %xmm5 ; SSE4-NEXT: movdqa %xmm4, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm8, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 +; SSE4-NEXT: movdqa %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm7 +; SSE4-NEXT: movdqa %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm10 ; SSE4-NEXT: movdqa %xmm11, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm9 ; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm7, %xmm1 +; SSE4-NEXT: movapd %xmm10, %xmm2 +; SSE4-NEXT: movapd %xmm9, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test121: @@ -4970,30 +4971,33 @@ ; ; SSE4-LABEL: test122: ; SSE4: # BB#0: # %entry +; SSE4-NEXT: movdqa %xmm7, %xmm9 +; SSE4-NEXT: movdqa %xmm6, %xmm10 +; SSE4-NEXT: movdqa %xmm5, %xmm11 ; SSE4-NEXT: movdqa %xmm0, %xmm8 -; SSE4-NEXT: movdqa %xmm3, %xmm9 -; SSE4-NEXT: pcmpgtq %xmm7, %xmm9 +; SSE4-NEXT: movdqa %xmm3, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm9, %xmm7 ; SSE4-NEXT: pcmpeqd %xmm12, %xmm12 -; SSE4-NEXT: pxor %xmm12, %xmm9 -; SSE4-NEXT: movdqa %xmm2, %xmm10 -; SSE4-NEXT: pcmpgtq %xmm6, %xmm10 -; SSE4-NEXT: pxor %xmm12, %xmm10 -; SSE4-NEXT: movdqa %xmm1, %xmm11 -; SSE4-NEXT: pcmpgtq %xmm5, %xmm11 -; SSE4-NEXT: pxor %xmm12, %xmm11 +; SSE4-NEXT: pxor %xmm12, %xmm7 +; SSE4-NEXT: movdqa %xmm2, %xmm6 +; SSE4-NEXT: pcmpgtq %xmm10, %xmm6 +; SSE4-NEXT: pxor %xmm12, %xmm6 +; SSE4-NEXT: movdqa %xmm1, %xmm5 +; SSE4-NEXT: pcmpgtq %xmm11, %xmm5 +; SSE4-NEXT: pxor %xmm12, %xmm5 ; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 ; SSE4-NEXT: pxor %xmm12, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 -; SSE4-NEXT: movdqa %xmm11, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: movdqa %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm11 +; SSE4-NEXT: movdqa %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm9 ; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm11, %xmm1 +; SSE4-NEXT: movapd %xmm10, %xmm2 +; SSE4-NEXT: movapd %xmm9, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test122: @@ -5105,25 +5109,27 @@ ; ; SSE4-LABEL: test123: ; SSE4: # BB#0: # %entry +; SSE4-NEXT: movdqa %xmm7, %xmm9 +; SSE4-NEXT: movdqa %xmm6, %xmm10 ; SSE4-NEXT: movdqa %xmm0, %xmm8 -; SSE4-NEXT: movdqa %xmm3, %xmm9 -; SSE4-NEXT: pcmpgtq %xmm7, %xmm9 -; SSE4-NEXT: movdqa %xmm2, %xmm10 -; SSE4-NEXT: pcmpgtq %xmm6, %xmm10 -; SSE4-NEXT: movdqa %xmm1, %xmm11 -; SSE4-NEXT: pcmpgtq %xmm5, %xmm11 +; SSE4-NEXT: movdqa %xmm3, %xmm11 +; SSE4-NEXT: pcmpgtq %xmm9, %xmm11 +; SSE4-NEXT: movdqa %xmm2, %xmm6 +; SSE4-NEXT: pcmpgtq %xmm10, %xmm6 +; SSE4-NEXT: movdqa %xmm1, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm7 ; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 -; SSE4-NEXT: movdqa %xmm11, %xmm0 +; SSE4-NEXT: movdqa %xmm7, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: movdqa %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm10 +; SSE4-NEXT: movdqa %xmm11, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm9 ; SSE4-NEXT: movapd %xmm4, %xmm0 ; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm10, %xmm2 +; SSE4-NEXT: movapd %xmm9, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test123: @@ -5245,31 +5251,31 @@ ; ; SSE4-LABEL: test124: ; SSE4: # BB#0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 ; SSE4-NEXT: movdqa %xmm7, %xmm9 -; SSE4-NEXT: pcmpgtq %xmm3, %xmm9 -; SSE4-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE4-NEXT: pxor %xmm0, %xmm9 ; SSE4-NEXT: movdqa %xmm6, %xmm10 -; SSE4-NEXT: pcmpgtq %xmm2, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 ; SSE4-NEXT: movdqa %xmm5, %xmm11 -; SSE4-NEXT: pcmpgtq %xmm1, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 ; SSE4-NEXT: movdqa %xmm4, %xmm12 -; SSE4-NEXT: pcmpgtq %xmm8, %xmm12 -; SSE4-NEXT: pxor %xmm12, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 -; SSE4-NEXT: movdqa %xmm11, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: pcmpgtq %xmm3, %xmm7 +; SSE4-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE4-NEXT: pxor %xmm0, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm2, %xmm6 +; SSE4-NEXT: pxor %xmm0, %xmm6 +; SSE4-NEXT: pcmpgtq %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pcmpgtq %xmm8, %xmm4 +; SSE4-NEXT: pxor %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm12 +; SSE4-NEXT: movdqa %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm11 +; SSE4-NEXT: movdqa %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm9 +; SSE4-NEXT: movapd %xmm12, %xmm0 +; SSE4-NEXT: movapd %xmm11, %xmm1 +; SSE4-NEXT: movapd %xmm10, %xmm2 +; SSE4-NEXT: movapd %xmm9, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test124: @@ -5381,38 +5387,40 @@ ; ; SSE4-LABEL: test125: ; SSE4: # BB#0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 -; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm3, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 ; SSE4-NEXT: movdqa %xmm7, %xmm9 -; SSE4-NEXT: pxor %xmm0, %xmm9 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm9 -; SSE4-NEXT: movdqa %xmm2, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 ; SSE4-NEXT: movdqa %xmm6, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 -; SSE4-NEXT: pcmpgtq %xmm11, %xmm10 -; SSE4-NEXT: movdqa %xmm1, %xmm12 -; SSE4-NEXT: pxor %xmm0, %xmm12 ; SSE4-NEXT: movdqa %xmm5, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 -; SSE4-NEXT: pcmpgtq %xmm12, %xmm11 -; SSE4-NEXT: movdqa %xmm8, %xmm12 -; SSE4-NEXT: pxor %xmm0, %xmm12 -; SSE4-NEXT: pxor %xmm4, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm12, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 -; SSE4-NEXT: movdqa %xmm11, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movdqa %xmm4, %xmm12 +; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: movdqa %xmm3, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm7 +; SSE4-NEXT: movdqa %xmm2, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm6 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm6 +; SSE4-NEXT: movdqa %xmm1, %xmm4 +; SSE4-NEXT: pxor %xmm0, %xmm4 +; SSE4-NEXT: movdqa %xmm11, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm5 +; SSE4-NEXT: movdqa %xmm8, %xmm4 +; SSE4-NEXT: pxor %xmm0, %xmm4 +; SSE4-NEXT: pxor %xmm12, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm12 +; SSE4-NEXT: movdqa %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm11 +; SSE4-NEXT: movdqa %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm9 +; SSE4-NEXT: movapd %xmm12, %xmm0 +; SSE4-NEXT: movapd %xmm11, %xmm1 +; SSE4-NEXT: movapd %xmm10, %xmm2 +; SSE4-NEXT: movapd %xmm9, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test125: @@ -5442,7 +5450,7 @@ ; ; AVX2-LABEL: test125: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm5 ; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm6 ; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 @@ -5547,43 +5555,47 @@ ; ; SSE4-LABEL: test126: ; SSE4: # BB#0: # %entry +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm6, %xmm10 +; SSE4-NEXT: movdqa %xmm5, %xmm11 +; SSE4-NEXT: movdqa %xmm4, %xmm13 ; SSE4-NEXT: movdqa %xmm0, %xmm9 ; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm7, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 -; SSE4-NEXT: movdqa %xmm3, %xmm8 -; SSE4-NEXT: pxor %xmm0, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm8 +; SSE4-NEXT: movdqa %xmm8, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: movdqa %xmm3, %xmm7 +; SSE4-NEXT: pxor %xmm0, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm7 ; SSE4-NEXT: pcmpeqd %xmm12, %xmm12 -; SSE4-NEXT: pxor %xmm12, %xmm8 -; SSE4-NEXT: movdqa %xmm6, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 -; SSE4-NEXT: movdqa %xmm2, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 -; SSE4-NEXT: pcmpgtq %xmm11, %xmm10 -; SSE4-NEXT: pxor %xmm12, %xmm10 -; SSE4-NEXT: movdqa %xmm5, %xmm13 -; SSE4-NEXT: pxor %xmm0, %xmm13 -; SSE4-NEXT: movdqa %xmm1, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 -; SSE4-NEXT: pcmpgtq %xmm13, %xmm11 -; SSE4-NEXT: pxor %xmm12, %xmm11 -; SSE4-NEXT: movdqa %xmm4, %xmm13 -; SSE4-NEXT: pxor %xmm0, %xmm13 +; SSE4-NEXT: pxor %xmm12, %xmm7 +; SSE4-NEXT: movdqa %xmm10, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: movdqa %xmm2, %xmm6 +; SSE4-NEXT: pxor %xmm0, %xmm6 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm6 +; SSE4-NEXT: pxor %xmm12, %xmm6 +; SSE4-NEXT: movdqa %xmm11, %xmm4 +; SSE4-NEXT: pxor %xmm0, %xmm4 +; SSE4-NEXT: movdqa %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm5 +; SSE4-NEXT: pxor %xmm12, %xmm5 +; SSE4-NEXT: movdqa %xmm13, %xmm4 +; SSE4-NEXT: pxor %xmm0, %xmm4 ; SSE4-NEXT: pxor %xmm9, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm13, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 ; SSE4-NEXT: pxor %xmm12, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm11, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm13 +; SSE4-NEXT: movdqa %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm11 +; SSE4-NEXT: movdqa %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 +; SSE4-NEXT: movapd %xmm13, %xmm0 +; SSE4-NEXT: movapd %xmm11, %xmm1 +; SSE4-NEXT: movapd %xmm10, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test126: @@ -5618,7 +5630,7 @@ ; ; AVX2-LABEL: test126: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm5 ; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm6 ; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 @@ -5709,38 +5721,42 @@ ; ; SSE4-LABEL: test127: ; SSE4: # BB#0: # %entry +; SSE4-NEXT: movdqa %xmm7, %xmm9 +; SSE4-NEXT: movdqa %xmm6, %xmm10 +; SSE4-NEXT: movdqa %xmm5, %xmm11 +; SSE4-NEXT: movdqa %xmm4, %xmm12 ; SSE4-NEXT: movdqa %xmm0, %xmm8 ; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm7, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 -; SSE4-NEXT: movdqa %xmm3, %xmm9 -; SSE4-NEXT: pxor %xmm0, %xmm9 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm9 -; SSE4-NEXT: movdqa %xmm6, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 -; SSE4-NEXT: movdqa %xmm2, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 -; SSE4-NEXT: pcmpgtq %xmm11, %xmm10 -; SSE4-NEXT: movdqa %xmm5, %xmm12 -; SSE4-NEXT: pxor %xmm0, %xmm12 -; SSE4-NEXT: movdqa %xmm1, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 -; SSE4-NEXT: pcmpgtq %xmm12, %xmm11 -; SSE4-NEXT: movdqa %xmm4, %xmm12 -; SSE4-NEXT: pxor %xmm0, %xmm12 +; SSE4-NEXT: movdqa %xmm9, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: movdqa %xmm3, %xmm7 +; SSE4-NEXT: pxor %xmm0, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm7 +; SSE4-NEXT: movdqa %xmm10, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: movdqa %xmm2, %xmm6 +; SSE4-NEXT: pxor %xmm0, %xmm6 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm6 +; SSE4-NEXT: movdqa %xmm11, %xmm4 +; SSE4-NEXT: pxor %xmm0, %xmm4 +; SSE4-NEXT: movdqa %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm5 +; SSE4-NEXT: movdqa %xmm12, %xmm4 +; SSE4-NEXT: pxor %xmm0, %xmm4 ; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm12, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 -; SSE4-NEXT: movdqa %xmm11, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm12 +; SSE4-NEXT: movdqa %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm11 +; SSE4-NEXT: movdqa %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm9 +; SSE4-NEXT: movapd %xmm12, %xmm0 +; SSE4-NEXT: movapd %xmm11, %xmm1 +; SSE4-NEXT: movapd %xmm10, %xmm2 +; SSE4-NEXT: movapd %xmm9, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test127: @@ -5770,7 +5786,7 @@ ; ; AVX2-LABEL: test127: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm5 ; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm6 ; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 @@ -5876,43 +5892,45 @@ ; ; SSE4-LABEL: test128: ; SSE4: # BB#0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm3, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 ; SSE4-NEXT: movdqa %xmm7, %xmm8 -; SSE4-NEXT: pxor %xmm0, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm8 -; SSE4-NEXT: pcmpeqd %xmm12, %xmm12 -; SSE4-NEXT: pxor %xmm12, %xmm8 -; SSE4-NEXT: movdqa %xmm2, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 ; SSE4-NEXT: movdqa %xmm6, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 -; SSE4-NEXT: pcmpgtq %xmm11, %xmm10 -; SSE4-NEXT: pxor %xmm12, %xmm10 -; SSE4-NEXT: movdqa %xmm1, %xmm13 -; SSE4-NEXT: pxor %xmm0, %xmm13 ; SSE4-NEXT: movdqa %xmm5, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 -; SSE4-NEXT: pcmpgtq %xmm13, %xmm11 -; SSE4-NEXT: pxor %xmm12, %xmm11 -; SSE4-NEXT: movdqa %xmm9, %xmm13 -; SSE4-NEXT: pxor %xmm0, %xmm13 -; SSE4-NEXT: pxor %xmm4, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm13, %xmm0 +; SSE4-NEXT: movdqa %xmm4, %xmm13 +; SSE4-NEXT: movdqa %xmm0, %xmm9 +; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: movdqa %xmm3, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm7 +; SSE4-NEXT: pcmpeqd %xmm12, %xmm12 +; SSE4-NEXT: pxor %xmm12, %xmm7 +; SSE4-NEXT: movdqa %xmm2, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm6 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm6 +; SSE4-NEXT: pxor %xmm12, %xmm6 +; SSE4-NEXT: movdqa %xmm1, %xmm4 +; SSE4-NEXT: pxor %xmm0, %xmm4 +; SSE4-NEXT: movdqa %xmm11, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm5 +; SSE4-NEXT: pxor %xmm12, %xmm5 +; SSE4-NEXT: movdqa %xmm9, %xmm4 +; SSE4-NEXT: pxor %xmm0, %xmm4 +; SSE4-NEXT: pxor %xmm13, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 ; SSE4-NEXT: pxor %xmm12, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm11, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm13 +; SSE4-NEXT: movdqa %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm11 +; SSE4-NEXT: movdqa %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 +; SSE4-NEXT: movapd %xmm13, %xmm0 +; SSE4-NEXT: movapd %xmm11, %xmm1 +; SSE4-NEXT: movapd %xmm10, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test128: @@ -5947,7 +5965,7 @@ ; ; AVX2-LABEL: test128: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm5 ; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm6 ; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 @@ -8158,7 +8176,7 @@ ; ; AVX2-LABEL: test157: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm5 ; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm6 ; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 @@ -8331,7 +8349,7 @@ ; ; AVX2-LABEL: test158: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm5 ; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm6 ; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 @@ -8484,7 +8502,7 @@ ; ; AVX2-LABEL: test159: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm5 ; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm6 ; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 @@ -8658,7 +8676,7 @@ ; ; AVX2-LABEL: test160: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm5 ; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm6 ; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 @@ -9080,7 +9098,7 @@ ; ; AVX2-LABEL: test165: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 @@ -9181,7 +9199,7 @@ ; ; AVX2-LABEL: test166: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 @@ -9272,7 +9290,7 @@ ; ; AVX2-LABEL: test167: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 @@ -9373,7 +9391,7 @@ ; ; AVX2-LABEL: test168: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 @@ -9791,7 +9809,7 @@ ; ; AVX2-LABEL: test173: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 @@ -9891,7 +9909,7 @@ ; ; AVX2-LABEL: test174: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 @@ -9983,7 +10001,7 @@ ; ; AVX2-LABEL: test175: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 @@ -10083,7 +10101,7 @@ ; ; AVX2-LABEL: test176: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 Index: test/CodeGen/X86/vselect.ll =================================================================== --- test/CodeGen/X86/vselect.ll +++ test/CodeGen/X86/vselect.ll @@ -487,25 +487,25 @@ define <16 x double> @select_illegal(<16 x double> %a, <16 x double> %b) { ; SSE-LABEL: select_illegal: ; SSE: # BB#0: +; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm4 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm5 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm6 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm7 -; SSE-NEXT: movaps %xmm7, 112(%rdi) -; SSE-NEXT: movaps %xmm6, 96(%rdi) -; SSE-NEXT: movaps %xmm5, 80(%rdi) -; SSE-NEXT: movaps %xmm4, 64(%rdi) -; SSE-NEXT: movaps %xmm3, 48(%rdi) -; SSE-NEXT: movaps %xmm2, 32(%rdi) -; SSE-NEXT: movaps %xmm1, 16(%rdi) -; SSE-NEXT: movaps %xmm0, (%rdi) -; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: movaps %xmm7, 112(%rax) +; SSE-NEXT: movaps %xmm6, 96(%rax) +; SSE-NEXT: movaps %xmm5, 80(%rax) +; SSE-NEXT: movaps %xmm4, 64(%rax) +; SSE-NEXT: movaps %xmm3, 48(%rax) +; SSE-NEXT: movaps %xmm2, 32(%rax) +; SSE-NEXT: movaps %xmm1, 16(%rax) +; SSE-NEXT: movaps %xmm0, (%rax) ; SSE-NEXT: retq ; ; AVX-LABEL: select_illegal: ; AVX: # BB#0: -; AVX-NEXT: vmovaps %ymm6, %ymm2 ; AVX-NEXT: vmovaps %ymm7, %ymm3 +; AVX-NEXT: vmovaps %ymm6, %ymm2 ; AVX-NEXT: retq %sel = select <16 x i1> , <16 x double> %a, <16 x double> %b ret <16 x double> %sel Index: test/CodeGen/X86/widen_bitops-0.ll =================================================================== --- test/CodeGen/X86/widen_bitops-0.ll +++ test/CodeGen/X86/widen_bitops-0.ll @@ -15,8 +15,8 @@ ; ; X64-SSE-LABEL: and_i24_as_v3i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: andl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: andl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <3 x i8> %2 = bitcast i24 %b to <3 x i8> @@ -34,8 +34,8 @@ ; ; X64-SSE-LABEL: xor_i24_as_v3i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: xorl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <3 x i8> %2 = bitcast i24 %b to <3 x i8> @@ -53,8 +53,8 @@ ; ; X64-SSE-LABEL: or_i24_as_v3i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: orl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: orl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <3 x i8> %2 = bitcast i24 %b to <3 x i8> @@ -76,8 +76,8 @@ ; ; X64-SSE-LABEL: and_i24_as_v8i3: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: andl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: andl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <8 x i3> %2 = bitcast i24 %b to <8 x i3> @@ -95,8 +95,8 @@ ; ; X64-SSE-LABEL: xor_i24_as_v8i3: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: xorl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <8 x i3> %2 = bitcast i24 %b to <8 x i3> @@ -114,8 +114,8 @@ ; ; X64-SSE-LABEL: or_i24_as_v8i3: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: orl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: orl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <8 x i3> %2 = bitcast i24 %b to <8 x i3> Index: test/CodeGen/X86/widen_bitops-1.ll =================================================================== --- test/CodeGen/X86/widen_bitops-1.ll +++ test/CodeGen/X86/widen_bitops-1.ll @@ -15,8 +15,8 @@ ; ; X64-SSE-LABEL: and_i32_as_v4i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: andl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: andl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <4 x i8> %2 = bitcast i32 %b to <4 x i8> @@ -34,8 +34,8 @@ ; ; X64-SSE-LABEL: xor_i32_as_v4i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: xorl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <4 x i8> %2 = bitcast i32 %b to <4 x i8> @@ -53,8 +53,8 @@ ; ; X64-SSE-LABEL: or_i32_as_v4i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: orl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: orl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <4 x i8> %2 = bitcast i32 %b to <4 x i8> @@ -76,8 +76,8 @@ ; ; X64-SSE-LABEL: and_i32_as_v8i4: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: andl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: andl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <8 x i4> %2 = bitcast i32 %b to <8 x i4> @@ -95,8 +95,8 @@ ; ; X64-SSE-LABEL: xor_i32_as_v8i4: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: xorl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <8 x i4> %2 = bitcast i32 %b to <8 x i4> @@ -114,8 +114,8 @@ ; ; X64-SSE-LABEL: or_i32_as_v8i4: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: orl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: orl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <8 x i4> %2 = bitcast i32 %b to <8 x i4> Index: test/CodeGen/X86/widen_load-2.ll =================================================================== --- test/CodeGen/X86/widen_load-2.ll +++ test/CodeGen/X86/widen_load-2.ll @@ -21,11 +21,11 @@ ; ; X64-LABEL: add3i32: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: paddd (%rdx), %xmm0 -; X64-NEXT: pextrd $2, %xmm0, 8(%rdi) -; X64-NEXT: movq %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrd $2, %xmm0, 8(%rax) +; X64-NEXT: movq %xmm0, (%rax) ; X64-NEXT: retq %a = load %i32vec3, %i32vec3* %ap, align 16 %b = load %i32vec3, %i32vec3* %bp, align 16 @@ -54,14 +54,14 @@ ; ; X64-LABEL: add3i32_2: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: pinsrd $2, 8(%rsi), %xmm0 ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; X64-NEXT: pinsrd $2, 8(%rdx), %xmm1 ; X64-NEXT: paddd %xmm0, %xmm1 -; X64-NEXT: pextrd $2, %xmm1, 8(%rdi) -; X64-NEXT: movq %xmm1, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrd $2, %xmm1, 8(%rax) +; X64-NEXT: movq %xmm1, (%rax) ; X64-NEXT: retq %a = load %i32vec3, %i32vec3* %ap, align 8 %b = load %i32vec3, %i32vec3* %bp, align 8 @@ -89,14 +89,14 @@ ; ; X64-LABEL: add7i32: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: paddd (%rdx), %xmm0 ; X64-NEXT: paddd 16(%rdx), %xmm1 -; X64-NEXT: pextrd $2, %xmm1, 24(%rdi) -; X64-NEXT: movq %xmm1, 16(%rdi) -; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrd $2, %xmm1, 24(%rax) +; X64-NEXT: movq %xmm1, 16(%rax) +; X64-NEXT: movdqa %xmm0, (%rax) ; X64-NEXT: retq %a = load %i32vec7, %i32vec7* %ap, align 16 %b = load %i32vec7, %i32vec7* %bp, align 16 @@ -125,16 +125,16 @@ ; ; X64-LABEL: add12i32: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: movdqa 32(%rsi), %xmm2 ; X64-NEXT: paddd (%rdx), %xmm0 ; X64-NEXT: paddd 16(%rdx), %xmm1 ; X64-NEXT: paddd 32(%rdx), %xmm2 -; X64-NEXT: movdqa %xmm2, 32(%rdi) -; X64-NEXT: movdqa %xmm1, 16(%rdi) -; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movdqa %xmm2, 32(%rax) +; X64-NEXT: movdqa %xmm1, 16(%rax) +; X64-NEXT: movdqa %xmm0, (%rax) ; X64-NEXT: retq %a = load %i32vec12, %i32vec12* %ap, align 16 %b = load %i32vec12, %i32vec12* %bp, align 16 @@ -171,13 +171,13 @@ ; ; X64-LABEL: add3i16: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; X64-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; X64-NEXT: paddd %xmm0, %xmm1 -; X64-NEXT: pextrw $4, %xmm1, 4(%rdi) +; X64-NEXT: pextrw $4, %xmm1, 4(%rax) ; X64-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; X64-NEXT: movd %xmm1, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movd %xmm1, (%rax) ; X64-NEXT: retq %a = load %i16vec3, %i16vec3* %ap, align 16 %b = load %i16vec3, %i16vec3* %bp, align 16 @@ -201,11 +201,11 @@ ; ; X64-LABEL: add4i16: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; X64-NEXT: paddw %xmm0, %xmm1 -; X64-NEXT: movq %xmm1, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %xmm1, (%rax) ; X64-NEXT: retq %a = load %i16vec4, %i16vec4* %ap, align 16 %b = load %i16vec4, %i16vec4* %bp, align 16 @@ -232,13 +232,13 @@ ; ; X64-LABEL: add12i16: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: paddw (%rdx), %xmm0 ; X64-NEXT: paddw 16(%rdx), %xmm1 -; X64-NEXT: movq %xmm1, 16(%rdi) -; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %xmm1, 16(%rax) +; X64-NEXT: movdqa %xmm0, (%rax) ; X64-NEXT: retq %a = load %i16vec12, %i16vec12* %ap, align 16 %b = load %i16vec12, %i16vec12* %bp, align 16 @@ -267,16 +267,16 @@ ; ; X64-LABEL: add18i16: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: movdqa 32(%rsi), %xmm2 ; X64-NEXT: paddw (%rdx), %xmm0 ; X64-NEXT: paddw 16(%rdx), %xmm1 ; X64-NEXT: paddw 32(%rdx), %xmm2 -; X64-NEXT: movd %xmm2, 32(%rdi) -; X64-NEXT: movdqa %xmm1, 16(%rdi) -; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movd %xmm2, 32(%rax) +; X64-NEXT: movdqa %xmm1, 16(%rax) +; X64-NEXT: movdqa %xmm0, (%rax) ; X64-NEXT: retq %a = load %i16vec18, %i16vec18* %ap, align 16 %b = load %i16vec18, %i16vec18* %bp, align 16 @@ -305,13 +305,13 @@ ; ; X64-LABEL: add3i8: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; X64-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; X64-NEXT: paddd %xmm0, %xmm1 -; X64-NEXT: pextrb $8, %xmm1, 2(%rdi) +; X64-NEXT: pextrb $8, %xmm1, 2(%rax) ; X64-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X64-NEXT: pextrw $0, %xmm1, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrw $0, %xmm1, (%rax) ; X64-NEXT: retq %a = load %i8vec3, %i8vec3* %ap, align 16 %b = load %i8vec3, %i8vec3* %bp, align 16 @@ -341,16 +341,16 @@ ; ; X64-LABEL: add31i8: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: paddb (%rdx), %xmm0 ; X64-NEXT: paddb 16(%rdx), %xmm1 -; X64-NEXT: pextrb $14, %xmm1, 30(%rdi) -; X64-NEXT: pextrw $6, %xmm1, 28(%rdi) -; X64-NEXT: pextrd $2, %xmm1, 24(%rdi) -; X64-NEXT: movq %xmm1, 16(%rdi) -; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrb $14, %xmm1, 30(%rax) +; X64-NEXT: pextrw $6, %xmm1, 28(%rax) +; X64-NEXT: pextrd $2, %xmm1, 24(%rax) +; X64-NEXT: movq %xmm1, 16(%rax) +; X64-NEXT: movdqa %xmm0, (%rax) ; X64-NEXT: retq %a = load %i8vec31, %i8vec31* %ap, align 16 %b = load %i8vec31, %i8vec31* %bp, align 16 @@ -386,6 +386,7 @@ ; ; X64-LABEL: rot: ; X64: # BB#0: # %entry +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa {{.*#+}} xmm0 = [40606,158] ; X64-NEXT: pextrw $0, %xmm0, (%rsi) ; X64-NEXT: movb $-98, 2(%rsi) @@ -397,9 +398,8 @@ ; X64-NEXT: psrld $1, %xmm1 ; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] ; X64-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X64-NEXT: pextrb $8, %xmm1, 2(%rdi) -; X64-NEXT: pextrw $0, %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrb $8, %xmm1, 2(%rax) +; X64-NEXT: pextrw $0, %xmm0, (%rax) ; X64-NEXT: retq entry: %storetmp = bitcast %i8vec3pack* %X to <3 x i8>* Index: test/CodeGen/X86/widen_load-3.ll =================================================================== --- test/CodeGen/X86/widen_load-3.ll +++ test/CodeGen/X86/widen_load-3.ll @@ -41,26 +41,26 @@ ; ; X64-SSE-LABEL: load7_aligned: ; X64-SSE: # BB#0: +; X64-SSE-NEXT: movq %rdi, %rax ; X64-SSE-NEXT: movaps (%rsi), %xmm0 ; X64-SSE-NEXT: movaps 16(%rsi), %xmm1 ; X64-SSE-NEXT: movaps 32(%rsi), %xmm2 -; X64-SSE-NEXT: movq 48(%rsi), %rax -; X64-SSE-NEXT: movq %rax, 48(%rdi) -; X64-SSE-NEXT: movaps %xmm2, 32(%rdi) -; X64-SSE-NEXT: movaps %xmm1, 16(%rdi) -; X64-SSE-NEXT: movaps %xmm0, (%rdi) -; X64-SSE-NEXT: movq %rdi, %rax +; X64-SSE-NEXT: movq 48(%rsi), %rcx +; X64-SSE-NEXT: movq %rcx, 48(%rax) +; X64-SSE-NEXT: movaps %xmm2, 32(%rax) +; X64-SSE-NEXT: movaps %xmm1, 16(%rax) +; X64-SSE-NEXT: movaps %xmm0, (%rax) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: load7_aligned: ; X64-AVX: # BB#0: +; X64-AVX-NEXT: movq %rdi, %rax ; X64-AVX-NEXT: vmovaps (%rsi), %ymm0 ; X64-AVX-NEXT: vmovaps 32(%rsi), %ymm1 -; X64-AVX-NEXT: vmovaps %ymm0, (%rdi) +; X64-AVX-NEXT: vmovaps %ymm0, (%rax) ; X64-AVX-NEXT: vextractf128 $1, %ymm1, %xmm0 -; X64-AVX-NEXT: vmovlps %xmm0, 48(%rdi) -; X64-AVX-NEXT: vmovaps %xmm1, 32(%rdi) -; X64-AVX-NEXT: movq %rdi, %rax +; X64-AVX-NEXT: vmovlps %xmm0, 48(%rax) +; X64-AVX-NEXT: vmovaps %xmm1, 32(%rax) ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq %x1 = load <7 x i64>, <7 x i64>* %x @@ -101,26 +101,26 @@ ; ; X64-SSE-LABEL: load7_unaligned: ; X64-SSE: # BB#0: +; X64-SSE-NEXT: movq %rdi, %rax ; X64-SSE-NEXT: movups (%rsi), %xmm0 ; X64-SSE-NEXT: movups 16(%rsi), %xmm1 ; X64-SSE-NEXT: movups 32(%rsi), %xmm2 -; X64-SSE-NEXT: movq 48(%rsi), %rax -; X64-SSE-NEXT: movq %rax, 48(%rdi) -; X64-SSE-NEXT: movaps %xmm2, 32(%rdi) -; X64-SSE-NEXT: movaps %xmm1, 16(%rdi) -; X64-SSE-NEXT: movaps %xmm0, (%rdi) -; X64-SSE-NEXT: movq %rdi, %rax +; X64-SSE-NEXT: movq 48(%rsi), %rcx +; X64-SSE-NEXT: movq %rcx, 48(%rax) +; X64-SSE-NEXT: movaps %xmm2, 32(%rax) +; X64-SSE-NEXT: movaps %xmm1, 16(%rax) +; X64-SSE-NEXT: movaps %xmm0, (%rax) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: load7_unaligned: ; X64-AVX: # BB#0: +; X64-AVX-NEXT: movq %rdi, %rax ; X64-AVX-NEXT: vmovups (%rsi), %ymm0 ; X64-AVX-NEXT: vmovups 32(%rsi), %xmm1 -; X64-AVX-NEXT: movq 48(%rsi), %rax -; X64-AVX-NEXT: movq %rax, 48(%rdi) -; X64-AVX-NEXT: vmovaps %xmm1, 32(%rdi) -; X64-AVX-NEXT: vmovaps %ymm0, (%rdi) -; X64-AVX-NEXT: movq %rdi, %rax +; X64-AVX-NEXT: movq 48(%rsi), %rcx +; X64-AVX-NEXT: movq %rcx, 48(%rax) +; X64-AVX-NEXT: vmovaps %xmm1, 32(%rax) +; X64-AVX-NEXT: vmovaps %ymm0, (%rax) ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq %x1 = load <7 x i64>, <7 x i64>* %x, align 1 Index: test/CodeGen/X86/win64_vararg.ll =================================================================== --- test/CodeGen/X86/win64_vararg.ll +++ test/CodeGen/X86/win64_vararg.ll @@ -121,10 +121,10 @@ } ; CHECK-LABEL: sret_arg: ; CHECK: pushq +; CHECK: movq %rcx, %rax ; CHECK-DAG: movq %r9, 40(%rsp) ; CHECK-DAG: movq %r8, 32(%rsp) ; CHECK: movl 32(%rsp), %[[tmp:[^ ]*]] -; CHECK: movl %[[tmp]], (%[[sret:[^ ]*]]) -; CHECK: movq %[[sret]], %rax +; CHECK: movl %[[tmp]], (%rax) ; CHECK: popq ; CHECK: retq Index: test/CodeGen/X86/x86-cmov-converter.ll =================================================================== --- test/CodeGen/X86/x86-cmov-converter.ll +++ test/CodeGen/X86/x86-cmov-converter.ll @@ -336,14 +336,14 @@ ; CHECK-LABEL: test_cmov_memoperand: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edx, %eax ; CHECK: cmpl %load = load i32, i32* %y %z = select i1 %cond, i32 %x, i32 %load ; CHECK-NOT: cmov ; CHECK: ja [[FALSE_BB:.*]] -; CHECK: movl (%r{{..}}), %[[R:.*]] +; CHECK: movl (%rcx), %eax ; CHECK: [[FALSE_BB]]: -; CHECK: movl %[[R]], % ret i32 %z } @@ -353,6 +353,7 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edx, %eax ; CHECK: cmpl %y = load i32, i32* %y.ptr %z1 = select i1 %cond, i32 %x, i32 %a @@ -362,17 +363,16 @@ ; CHECK: ja [[FALSE_BB:.*]] ; CHECK-DAG: movl %{{.*}}, %[[R1:.*]] ; CHECK-DAG: movl (%r{{..}}), %[[R2:.*]] -; CHECK-DAG: movl %{{.*}} %[[R3:.*]] +; CHECK-DAG: movl %{{.*}} %{{.*}} ; CHECK: [[FALSE_BB]]: ; CHECK: addl ; CHECK-DAG: %[[R1]] ; CHECK-DAG: , -; CHECK-DAG: %[[R3]] +; CHECK-DAG: %eax ; CHECK-DAG: addl ; CHECK-DAG: %[[R2]] ; CHECK-DAG: , -; CHECK-DAG: %[[R3]] -; CHECK: movl %[[R3]], %eax +; CHECK-DAG: %eax ; CHECK: retq %s1 = add i32 %z1, %z2 %s2 = add i32 %s1, %z3 @@ -384,6 +384,7 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group2: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edx, %eax ; CHECK: cmpl %y = load i32, i32* %y.ptr %z2 = select i1 %cond, i32 %a, i32 %x @@ -398,12 +399,11 @@ ; CHECK: addl ; CHECK-DAG: %[[R1]] ; CHECK-DAG: , -; CHECK-DAG: %[[R3]] +; CHECK-DAG: %eax ; CHECK-DAG: addl ; CHECK-DAG: %[[R2]] ; CHECK-DAG: , -; CHECK-DAG: %[[R3]] -; CHECK: movl %[[R3]], %eax +; CHECK-DAG: %eax ; CHECK: retq %s1 = add i32 %z1, %z2 %s2 = add i32 %s1, %z3 @@ -434,15 +434,15 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edi, %eax ; CHECK: cmpl %p = select i1 %cond, i32* %x, i32* %y %load = load i32, i32* %p %z = select i1 %cond, i32 %a, i32 %load ; CHECK-NOT: cmov ; CHECK: ja [[FALSE_BB:.*]] -; CHECK: movl (%r{{..}}), %[[R:.*]] +; CHECK: movl (%r{{..}}), %eax ; CHECK: [[FALSE_BB]]: -; CHECK: movl %[[R]], %eax ; CHECK: retq ret i32 %z } @@ -453,6 +453,7 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edi, %eax ; CHECK: cmpl %load1 = load i32*, i32** %y %p = select i1 %cond, i32* %x, i32* %load1 @@ -461,9 +462,8 @@ ; CHECK-NOT: cmov ; CHECK: ja [[FALSE_BB:.*]] ; CHECK: movq (%r{{..}}), %[[R1:.*]] -; CHECK: movl (%[[R1]]), %[[R2:.*]] +; CHECK: movl (%[[R1]]), %eax ; CHECK: [[FALSE_BB]]: -; CHECK: movl %[[R2]], %eax ; CHECK: retq ret i32 %z } @@ -475,6 +475,7 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edi, %eax ; CHECK: cmpl %p = select i1 %cond, i32* %x, i32* %y %p2 = select i1 %cond, i32* %z, i32* %p @@ -482,9 +483,8 @@ %r = select i1 %cond, i32 %a, i32 %load ; CHECK-NOT: cmov ; CHECK: ja [[FALSE_BB:.*]] -; CHECK: movl (%r{{..}}), %[[R:.*]] +; CHECK: movl (%r{{..}}), %eax ; CHECK: [[FALSE_BB]]: -; CHECK: movl %[[R]], %eax ; CHECK: retq ret i32 %r } Index: test/CodeGen/X86/x86-shrink-wrapping.ll =================================================================== --- test/CodeGen/X86/x86-shrink-wrapping.ll +++ test/CodeGen/X86/x86-shrink-wrapping.ll @@ -70,6 +70,7 @@ ; Check that we do not perform the restore inside the loop whereas the save ; is outside. ; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: +; CHECK-DAG: movl %esi, %eax ; ; Shrink-wrapping allows to skip the prologue in the else case. ; ENABLE: testl %edi, %edi @@ -77,14 +78,14 @@ ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx +; CHECK-DAG: pushq %rbx ; ; DISABLE: testl %edi, %edi ; DISABLE: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; SUM is in %esi because it is coalesced with the second ; argument on the else path. -; CHECK: xorl [[SUM:%esi]], [[SUM]] +; CHECK: xorl [[SUM:%eax]], [[SUM]] ; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] ; ; Next BB. @@ -98,23 +99,20 @@ ; SUM << 3. ; CHECK: shll $3, [[SUM]] ; -; Jump to epilogue. -; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] +; DISABLE: popq %rbx +; DISALBE: retq ; ; DISABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; DISABLE: addl %esi, %esi -; DISABLE: [[EPILOG_BB]]: ## %if.end +; DISABLE: addl %eax, %eax ; ; Epilogue code. ; CHECK-DAG: popq %rbx -; CHECK-DAG: movl %esi, %eax ; CHECK: retq ; ; ENABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; ENABLE: addl %esi, %esi -; ENABLE-NEXT: movl %esi, %eax +; ENABLE: addl %eax, %eax ; ENABLE-NEXT: retq define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { entry: @@ -197,19 +195,20 @@ ; Check with a more complex case that we do not have save within the loop and ; restore outside. ; CHECK-LABEL: loopInfoSaveOutsideLoop: +; CHECK-DAG: movl %esi, %eax ; ; ENABLE: testl %edi, %edi ; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx +; CHECK-DAG: pushq %rbx ; ; DISABLE: testl %edi, %edi ; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; CHECK: nop -; CHECK: xorl [[SUM:%esi]], [[SUM]] +; CHECK: xorl [[SUM:%eax]], [[SUM]] ; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] ; ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body @@ -221,22 +220,20 @@ ; CHECK: nop ; CHECK: shll $3, [[SUM]] ; -; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] +; DISABLE: popq %rbx +; DISABLE: retq ; ; DISABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; DISABLE: addl %esi, %esi -; DISABLE: [[EPILOG_BB]]: ## %if.end +; DISABLE: addl %eax, %eax ; ; Epilogue code. ; CHECK-DAG: popq %rbx -; CHECK-DAG: movl %esi, %eax ; CHECK: retq ; ; ENABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; ENABLE: addl %esi, %esi -; ENABLE-NEXT: movl %esi, %eax +; ENABLE: addl %eax, %eax ; ENABLE-NEXT: retq define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { entry: @@ -273,19 +270,20 @@ ; Check with a more complex case that we do not have restore within the loop and ; save outside. ; CHECK-LABEL: loopInfoRestoreOutsideLoop: +; CHECK-DAG: movl %esi, %eax ; ; ENABLE: testl %edi, %edi ; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx +; CHECK-DAG: pushq %rbx ; ; DISABLE: testl %edi, %edi ; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; CHECK: nop -; CHECK: xorl [[SUM:%esi]], [[SUM]] +; CHECK: xorl [[SUM:%eax]], [[SUM]] ; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] ; ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body @@ -296,23 +294,21 @@ ; Next BB. ; CHECK: shll $3, [[SUM]] ; -; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] +; DISABLE: popq %rbx +; DISABLE: retq ; ; DISABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; DISABLE: addl %esi, %esi -; DISABLE: [[EPILOG_BB]]: ## %if.end +; DISABLE: addl %eax, %eax ; ; Epilogue code. ; CHECK-DAG: popq %rbx -; CHECK-DAG: movl %esi, %eax ; CHECK: retq ; ; ENABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; ENABLE: addl %esi, %esi -; ENABLE-NEXT: movl %esi, %eax +; ENABLE: addl %eax, %eax ; ENABLE-NEXT: retq define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 { entry: @@ -357,13 +353,14 @@ ; Check that we handle inline asm correctly. ; CHECK-LABEL: inlineAsm: +; CHECK-DAG: movl %esi, %eax ; ; ENABLE: testl %edi, %edi ; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx +; CHECK-DAG: pushq %rbx ; ; DISABLE: testl %edi, %edi ; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] @@ -378,24 +375,22 @@ ; CHECK-NEXT: jne [[LOOP_LABEL]] ; Next BB. ; CHECK: nop -; CHECK: xorl %esi, %esi +; CHECK: xorl %eax, %eax ; -; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] +; DISABLE: popq %rbx +; DISABLE: retq ; ; DISABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; DISABLE: addl %esi, %esi -; DISABLE: [[EPILOG_BB]]: ## %if.end +; DISABLE: addl %eax, %eax ; ; Epilogue code. ; CHECK-DAG: popq %rbx -; CHECK-DAG: movl %esi, %eax ; CHECK: retq ; ; ENABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; ENABLE: addl %esi, %esi -; ENABLE-NEXT: movl %esi, %eax +; ENABLE: addl %eax, %eax ; ENABLE-NEXT: retq define i32 @inlineAsm(i32 %cond, i32 %N) { entry: Index: test/CodeGen/X86/xaluo.ll =================================================================== --- test/CodeGen/X86/xaluo.ll +++ test/CodeGen/X86/xaluo.ll @@ -719,26 +719,26 @@ define i32 @saddoselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: saddoselecti32: ; SDAG: ## BB#0: -; SDAG-NEXT: movl %edi, %eax -; SDAG-NEXT: addl %esi, %eax -; SDAG-NEXT: cmovol %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: movl %edi, %ecx +; SDAG-NEXT: addl %eax, %ecx +; SDAG-NEXT: cmovol %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: saddoselecti32: ; FAST: ## BB#0: -; FAST-NEXT: movl %edi, %eax -; FAST-NEXT: addl %esi, %eax -; FAST-NEXT: cmovol %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: movl %edi, %ecx +; FAST-NEXT: addl %eax, %ecx +; FAST-NEXT: cmovol %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: saddoselecti32: ; KNL: ## BB#0: -; KNL-NEXT: movl %edi, %eax -; KNL-NEXT: addl %esi, %eax -; KNL-NEXT: cmovol %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: movl %edi, %ecx +; KNL-NEXT: addl %eax, %ecx +; KNL-NEXT: cmovol %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -749,26 +749,26 @@ define i64 @saddoselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: saddoselecti64: ; SDAG: ## BB#0: -; SDAG-NEXT: movq %rdi, %rax -; SDAG-NEXT: addq %rsi, %rax -; SDAG-NEXT: cmovoq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: movq %rdi, %rcx +; SDAG-NEXT: addq %rax, %rcx +; SDAG-NEXT: cmovoq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: saddoselecti64: ; FAST: ## BB#0: -; FAST-NEXT: movq %rdi, %rax -; FAST-NEXT: addq %rsi, %rax -; FAST-NEXT: cmovoq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: movq %rdi, %rcx +; FAST-NEXT: addq %rax, %rcx +; FAST-NEXT: cmovoq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: saddoselecti64: ; KNL: ## BB#0: -; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: addq %rsi, %rax -; KNL-NEXT: cmovoq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: movq %rdi, %rcx +; KNL-NEXT: addq %rax, %rcx +; KNL-NEXT: cmovoq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -779,26 +779,26 @@ define i32 @uaddoselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: uaddoselecti32: ; SDAG: ## BB#0: -; SDAG-NEXT: movl %edi, %eax -; SDAG-NEXT: addl %esi, %eax -; SDAG-NEXT: cmovbl %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: movl %edi, %ecx +; SDAG-NEXT: addl %eax, %ecx +; SDAG-NEXT: cmovbl %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: uaddoselecti32: ; FAST: ## BB#0: -; FAST-NEXT: movl %edi, %eax -; FAST-NEXT: addl %esi, %eax -; FAST-NEXT: cmovbl %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: movl %edi, %ecx +; FAST-NEXT: addl %eax, %ecx +; FAST-NEXT: cmovbl %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: uaddoselecti32: ; KNL: ## BB#0: -; KNL-NEXT: movl %edi, %eax -; KNL-NEXT: addl %esi, %eax -; KNL-NEXT: cmovbl %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: movl %edi, %ecx +; KNL-NEXT: addl %eax, %ecx +; KNL-NEXT: cmovbl %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -809,26 +809,26 @@ define i64 @uaddoselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: uaddoselecti64: ; SDAG: ## BB#0: -; SDAG-NEXT: movq %rdi, %rax -; SDAG-NEXT: addq %rsi, %rax -; SDAG-NEXT: cmovbq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: movq %rdi, %rcx +; SDAG-NEXT: addq %rax, %rcx +; SDAG-NEXT: cmovbq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: uaddoselecti64: ; FAST: ## BB#0: -; FAST-NEXT: movq %rdi, %rax -; FAST-NEXT: addq %rsi, %rax -; FAST-NEXT: cmovbq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: movq %rdi, %rcx +; FAST-NEXT: addq %rax, %rcx +; FAST-NEXT: cmovbq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: uaddoselecti64: ; KNL: ## BB#0: -; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: addq %rsi, %rax -; KNL-NEXT: cmovbq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: movq %rdi, %rcx +; KNL-NEXT: addq %rax, %rcx +; KNL-NEXT: cmovbq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -839,23 +839,23 @@ define i32 @ssuboselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: ssuboselecti32: ; SDAG: ## BB#0: -; SDAG-NEXT: cmpl %esi, %edi -; SDAG-NEXT: cmovol %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: cmpl %eax, %edi +; SDAG-NEXT: cmovol %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: ssuboselecti32: ; FAST: ## BB#0: -; FAST-NEXT: cmpl %esi, %edi -; FAST-NEXT: cmovol %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: cmpl %eax, %edi +; FAST-NEXT: cmovol %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: ssuboselecti32: ; KNL: ## BB#0: -; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: cmovol %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: cmpl %eax, %edi +; KNL-NEXT: cmovol %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -866,23 +866,23 @@ define i64 @ssuboselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: ssuboselecti64: ; SDAG: ## BB#0: -; SDAG-NEXT: cmpq %rsi, %rdi -; SDAG-NEXT: cmovoq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: cmpq %rax, %rdi +; SDAG-NEXT: cmovoq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: ssuboselecti64: ; FAST: ## BB#0: -; FAST-NEXT: cmpq %rsi, %rdi -; FAST-NEXT: cmovoq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: cmpq %rax, %rdi +; FAST-NEXT: cmovoq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: ssuboselecti64: ; KNL: ## BB#0: -; KNL-NEXT: cmpq %rsi, %rdi -; KNL-NEXT: cmovoq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: cmpq %rax, %rdi +; KNL-NEXT: cmovoq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -893,23 +893,23 @@ define i32 @usuboselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: usuboselecti32: ; SDAG: ## BB#0: -; SDAG-NEXT: cmpl %esi, %edi -; SDAG-NEXT: cmovbl %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: cmpl %eax, %edi +; SDAG-NEXT: cmovbl %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: usuboselecti32: ; FAST: ## BB#0: -; FAST-NEXT: cmpl %esi, %edi -; FAST-NEXT: cmovbl %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: cmpl %eax, %edi +; FAST-NEXT: cmovbl %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: usuboselecti32: ; KNL: ## BB#0: -; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: cmovbl %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: cmpl %eax, %edi +; KNL-NEXT: cmovbl %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -920,23 +920,23 @@ define i64 @usuboselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: usuboselecti64: ; SDAG: ## BB#0: -; SDAG-NEXT: cmpq %rsi, %rdi -; SDAG-NEXT: cmovbq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: cmpq %rax, %rdi +; SDAG-NEXT: cmovbq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: usuboselecti64: ; FAST: ## BB#0: -; FAST-NEXT: cmpq %rsi, %rdi -; FAST-NEXT: cmovbq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: cmpq %rax, %rdi +; FAST-NEXT: cmovbq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: usuboselecti64: ; KNL: ## BB#0: -; KNL-NEXT: cmpq %rsi, %rdi -; KNL-NEXT: cmovbq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: cmpq %rax, %rdi +; KNL-NEXT: cmovbq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -1372,23 +1372,23 @@ define {i64, i1} @usuboovf(i64 %a, i64 %b) { ; SDAG-LABEL: usuboovf: ; SDAG: ## BB#0: -; SDAG-NEXT: notq %rsi -; SDAG-NEXT: xorl %edx, %edx ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: notq %rax +; SDAG-NEXT: xorl %edx, %edx ; SDAG-NEXT: retq ; ; FAST-LABEL: usuboovf: ; FAST: ## BB#0: -; FAST-NEXT: notq %rsi -; FAST-NEXT: xorl %edx, %edx ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: notq %rax +; FAST-NEXT: xorl %edx, %edx ; FAST-NEXT: retq ; ; KNL-LABEL: usuboovf: ; KNL: ## BB#0: -; KNL-NEXT: notq %rsi -; KNL-NEXT: xorl %edx, %edx ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: notq %rax +; KNL-NEXT: xorl %edx, %edx ; KNL-NEXT: retq %t0 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %a) %v0 = extractvalue {i64, i1} %t0, 0 Index: test/CodeGen/X86/xchg-nofold.ll =================================================================== --- test/CodeGen/X86/xchg-nofold.ll +++ test/CodeGen/X86/xchg-nofold.ll @@ -9,20 +9,21 @@ define zeroext i1 @_Z3fooRSt6atomicIbEb(%"struct.std::atomic"* nocapture dereferenceable(1) %a, i1 returned zeroext %b) nounwind { ; CHECK-LABEL: _Z3fooRSt6atomicIbEb: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: shrq $3, %rax -; CHECK-NEXT: movb 2147450880(%rax), %al -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: shrq $3, %rcx +; CHECK-NEXT: movb 2147450880(%rcx), %cl +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: je .LBB0_3 ; CHECK-NEXT: # BB#1: -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: andl $7, %ecx -; CHECK-NEXT: cmpb %al, %cl +; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: andl $7, %edx +; CHECK-NEXT: cmpb %cl, %dl ; CHECK-NEXT: jge .LBB0_2 ; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: xchgb %al, (%rdi) -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: xchgb %cl, (%rdi) +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: pushq %rax Index: test/CodeGen/X86/xmulo.ll =================================================================== --- test/CodeGen/X86/xmulo.ll +++ test/CodeGen/X86/xmulo.ll @@ -92,6 +92,7 @@ ; SDAG-LABEL: smuloi8: ; SDAG: ## BB#0: ; SDAG-NEXT: movl %edi, %eax +; SDAG-NEXT: ## kill: %AL %AL %EAX ; SDAG-NEXT: imulb %sil ; SDAG-NEXT: seto %cl ; SDAG-NEXT: movb %al, (%rdx) @@ -101,6 +102,7 @@ ; FAST-LABEL: smuloi8: ; FAST: ## BB#0: ; FAST-NEXT: movl %edi, %eax +; FAST-NEXT: ## kill: %AL %AL %EAX ; FAST-NEXT: imulb %sil ; FAST-NEXT: seto %cl ; FAST-NEXT: movb %al, (%rdx) @@ -111,6 +113,7 @@ ; KNL-LABEL: smuloi8: ; KNL: ## BB#0: ; KNL-NEXT: movl %edi, %eax +; KNL-NEXT: ## kill: %AL %AL %EAX ; KNL-NEXT: imulb %sil ; KNL-NEXT: seto %cl ; KNL-NEXT: movb %al, (%rdx) @@ -218,6 +221,7 @@ ; SDAG-LABEL: umuloi8: ; SDAG: ## BB#0: ; SDAG-NEXT: movl %edi, %eax +; SDAG-NEXT: ## kill: %AL %AL %EAX ; SDAG-NEXT: mulb %sil ; SDAG-NEXT: seto %cl ; SDAG-NEXT: movb %al, (%rdx) @@ -227,6 +231,7 @@ ; FAST-LABEL: umuloi8: ; FAST: ## BB#0: ; FAST-NEXT: movl %edi, %eax +; FAST-NEXT: ## kill: %AL %AL %EAX ; FAST-NEXT: mulb %sil ; FAST-NEXT: seto %cl ; FAST-NEXT: movb %al, (%rdx) @@ -237,6 +242,7 @@ ; KNL-LABEL: umuloi8: ; KNL: ## BB#0: ; KNL-NEXT: movl %edi, %eax +; KNL-NEXT: ## kill: %AL %AL %EAX ; KNL-NEXT: mulb %sil ; KNL-NEXT: seto %cl ; KNL-NEXT: movb %al, (%rdx) @@ -254,6 +260,7 @@ ; SDAG: ## BB#0: ; SDAG-NEXT: movq %rdx, %rcx ; SDAG-NEXT: movl %edi, %eax +; SDAG-NEXT: ## kill: %AX %AX %EAX ; SDAG-NEXT: mulw %si ; SDAG-NEXT: seto %dl ; SDAG-NEXT: movw %ax, (%rcx) @@ -264,6 +271,7 @@ ; FAST: ## BB#0: ; FAST-NEXT: movq %rdx, %rcx ; FAST-NEXT: movl %edi, %eax +; FAST-NEXT: ## kill: %AX %AX %EAX ; FAST-NEXT: mulw %si ; FAST-NEXT: seto %dl ; FAST-NEXT: movw %ax, (%rcx) @@ -275,6 +283,7 @@ ; KNL: ## BB#0: ; KNL-NEXT: movq %rdx, %rcx ; KNL-NEXT: movl %edi, %eax +; KNL-NEXT: ## kill: %AX %AX %EAX ; KNL-NEXT: mulw %si ; KNL-NEXT: seto %dl ; KNL-NEXT: movw %ax, (%rcx) @@ -369,26 +378,26 @@ define i32 @smuloselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: smuloselecti32: ; SDAG: ## BB#0: -; SDAG-NEXT: movl %edi, %eax -; SDAG-NEXT: imull %esi, %eax -; SDAG-NEXT: cmovol %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: movl %edi, %ecx +; SDAG-NEXT: imull %eax, %ecx +; SDAG-NEXT: cmovol %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: smuloselecti32: ; FAST: ## BB#0: -; FAST-NEXT: movl %edi, %eax -; FAST-NEXT: imull %esi, %eax -; FAST-NEXT: cmovol %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: movl %edi, %ecx +; FAST-NEXT: imull %eax, %ecx +; FAST-NEXT: cmovol %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: smuloselecti32: ; KNL: ## BB#0: -; KNL-NEXT: movl %edi, %eax -; KNL-NEXT: imull %esi, %eax -; KNL-NEXT: cmovol %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: movl %edi, %ecx +; KNL-NEXT: imull %eax, %ecx +; KNL-NEXT: cmovol %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -399,26 +408,26 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: smuloselecti64: ; SDAG: ## BB#0: -; SDAG-NEXT: movq %rdi, %rax -; SDAG-NEXT: imulq %rsi, %rax -; SDAG-NEXT: cmovoq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: movq %rdi, %rcx +; SDAG-NEXT: imulq %rax, %rcx +; SDAG-NEXT: cmovoq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: smuloselecti64: ; FAST: ## BB#0: -; FAST-NEXT: movq %rdi, %rax -; FAST-NEXT: imulq %rsi, %rax -; FAST-NEXT: cmovoq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: movq %rdi, %rcx +; FAST-NEXT: imulq %rax, %rcx +; FAST-NEXT: cmovoq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: smuloselecti64: ; KNL: ## BB#0: -; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: imulq %rsi, %rax -; KNL-NEXT: cmovoq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: movq %rdi, %rcx +; KNL-NEXT: imulq %rax, %rcx +; KNL-NEXT: cmovoq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -694,8 +703,8 @@ define i1 @bug27873(i64 %c1, i1 %c2) { ; SDAG-LABEL: bug27873: ; SDAG: ## BB#0: -; SDAG-NEXT: movl $160, %ecx ; SDAG-NEXT: movq %rdi, %rax +; SDAG-NEXT: movl $160, %ecx ; SDAG-NEXT: mulq %rcx ; SDAG-NEXT: seto %al ; SDAG-NEXT: orb %sil, %al @@ -703,8 +712,8 @@ ; ; FAST-LABEL: bug27873: ; FAST: ## BB#0: -; FAST-NEXT: movl $160, %ecx ; FAST-NEXT: movq %rdi, %rax +; FAST-NEXT: movl $160, %ecx ; FAST-NEXT: mulq %rcx ; FAST-NEXT: seto %al ; FAST-NEXT: orb %sil, %al @@ -712,8 +721,8 @@ ; ; KNL-LABEL: bug27873: ; KNL: ## BB#0: -; KNL-NEXT: movl $160, %ecx ; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: movl $160, %ecx ; KNL-NEXT: mulq %rcx ; KNL-NEXT: seto %al ; KNL-NEXT: orb %sil, %al Index: test/CodeGen/XCore/byVal.ll =================================================================== --- test/CodeGen/XCore/byVal.ll +++ test/CodeGen/XCore/byVal.ll @@ -38,13 +38,13 @@ ; CHECK-LABEL: f2Test ; CHECK: extsp 4 ; CHECK: stw lr, sp[1] +; CHECK: mov r11, r1 ; CHECK: stw r2, sp[3] ; CHECK: stw r3, sp[4] ; CHECK: ldw r0, r0[0] ; CHECK: stw r0, sp[2] -; CHECK: ldaw r2, sp[2] -; CHECK: mov r0, r1 -; CHECK: mov r1, r2 +; CHECK: ldaw r1, sp[2] +; CHECK: mov r0, r11 ; CHECK: bl f2 ; CHECK: ldw lr, sp[1] ; CHECK: ldaw sp, sp[4] Index: test/DebugInfo/COFF/pieces.ll =================================================================== --- test/DebugInfo/COFF/pieces.ll +++ test/DebugInfo/COFF/pieces.ll @@ -64,15 +64,15 @@ ; ASM-LABEL: pad_right: # @pad_right -; ASM: #DEBUG_VALUE: pad_right:o <- [DW_OP_LLVM_fragment 32 32] %ECX -; ASM: movl %ecx, %eax +; ASM: movq %rcx, %rax +; ASM: #DEBUG_VALUE: pad_right:o <- [DW_OP_LLVM_fragment 32 32] %EAX ; ASM: retq ; ASM-LABEL: pad_left: # @pad_left -; ASM: #DEBUG_VALUE: pad_left:o <- [DW_OP_LLVM_fragment 0 32] %ECX ; ASM: .cv_loc 2 1 24 3 # t.c:24:3 -; ASM: movl %ecx, %eax +; ASM: movq %rcx, %rax +; ASM: #DEBUG_VALUE: pad_left:o <- [DW_OP_LLVM_fragment 0 32] %EAX ; ASM: retq @@ -133,7 +133,7 @@ ; ASM: .asciz "pad_right" # Function name ; ASM: .short 4414 # Record kind: S_LOCAL ; ASM: .asciz "o" -; ASM: .cv_def_range .Lfunc_begin1 .Lfunc_end1, "C\021\022\000\000\000\004\000\000\000" +; ASM: .cv_def_range .Lfunc_begin1 .Ltmp8, "C\021\021\000\000\000\004\000\000\000" ; OBJ-LABEL: {{.*}}Proc{{.*}}Sym { ; OBJ: Kind: S_GPROC32_ID (0x1147) @@ -143,7 +143,7 @@ ; OBJ: VarName: o ; OBJ: } ; OBJ: DefRangeSubfieldRegisterSym { -; OBJ: Register: ECX (0x12) +; OBJ: Register: EAX (0x11) ; OBJ: MayHaveNoName: 0 ; OBJ: OffsetInParent: 4 ; OBJ: LocalVariableAddrRange { @@ -156,7 +156,7 @@ ; ASM: .asciz "pad_left" # Function name ; ASM: .short 4414 # Record kind: S_LOCAL ; ASM: .asciz "o" -; ASM: .cv_def_range .Lfunc_begin2 .Lfunc_end2, "C\021\022\000\000\000\000\000\000\000" +; ASM: .cv_def_range .Lfunc_begin2 .Ltmp10, "C\021\021\000\000\000\000\000\000\000" ; OBJ-LABEL: {{.*}}Proc{{.*}}Sym { ; OBJ: Kind: S_GPROC32_ID (0x1147) @@ -166,7 +166,7 @@ ; OBJ: VarName: o ; OBJ: } ; OBJ: DefRangeSubfieldRegisterSym { -; OBJ: Register: ECX (0x12) +; OBJ: Register: EAX (0x11) ; OBJ: MayHaveNoName: 0 ; OBJ: OffsetInParent: 0 ; OBJ: LocalVariableAddrRange { Index: test/DebugInfo/X86/live-debug-values.ll =================================================================== --- test/DebugInfo/X86/live-debug-values.ll +++ test/DebugInfo/X86/live-debug-values.ll @@ -29,11 +29,11 @@ ; DBG_VALUE for variable "n" is extended into BB#5 from its predecessors BB#3 ; and BB#4. +; CHECK: movl %eax, %esi ; CHECK: .LBB0_5: ; CHECK-NEXT: #DEBUG_VALUE: main:n <- %EBX ; Other register values have been clobbered. ; CHECK-NOT: #DEBUG_VALUE: -; CHECK: movl %ecx, m(%rip) ; ModuleID = 'LiveDebugValues.c' source_filename = "test/DebugInfo/X86/live-debug-values.ll" Index: test/DebugInfo/X86/live-debug-variables.ll =================================================================== --- test/DebugInfo/X86/live-debug-variables.ll +++ test/DebugInfo/X86/live-debug-variables.ll @@ -24,8 +24,10 @@ ; CHECK: .debug_loc contents: ; CHECK-NEXT: 0x00000000: + ; We currently emit an entry for the function prologue, too, which could be optimized away. -; CHECK: 0x000000000000001f - 0x000000000000003c: DW_OP_reg3 RBX +; CHECK: 0x000000000000000b - 0x0000000000000018: DW_OP_reg2 RCX +; CHECK: 0x0000000000000018 - 0x0000000000000072: DW_OP_reg3 RBX ; We should only have one entry inside the function. ; CHECK-NOT: : Index: test/DebugInfo/X86/pieces-3.ll =================================================================== --- test/DebugInfo/X86/pieces-3.ll +++ test/DebugInfo/X86/pieces-3.ll @@ -17,11 +17,12 @@ ; ; CHECK: DW_TAG_formal_parameter [3] ; CHECK-NEXT: DW_AT_location [DW_FORM_data4] ( -; CHECK-NEXT: 0x0000000000000000 - 0x0000000000000004: DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_piece 0x4, DW_OP_reg4 RSI, DW_OP_piece 0x4 -; CHECK-NEXT: 0x0000000000000004 - 0x0000000000000008: DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_piece 0x4, DW_OP_reg4 RSI, DW_OP_piece 0x4) +; CHECK-NEXT: 0x0000000000000000 - 0x0000000000000007: DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_piece 0x4, DW_OP_reg4 +; CHECK-NEXT: 0x0000000000000007 - 0x0000000000000007: DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_piece 0x4, DW_OP_reg0 ; CHECK-NEXT: DW_AT_name {{.*}}"outer" ; CHECK: DW_TAG_variable -; CHECK-NEXT: DW_AT_location {{.*}}(DW_OP_reg4 RSI, DW_OP_piece 0x4) +; CHECK-NEXT: DW_AT_location [DW_FORM_data4] (0x00000044 +; CHECK-NEXT: 0x0000000000000007 - 0x0000000000000007: DW_OP_reg0 RAX, DW_OP_piece 0x4) ; CHECK-NEXT: "i1" ; ModuleID = '/Volumes/Data/llvm/test/DebugInfo/X86/sroasplit-2.ll'