Index: include/llvm/CodeGen/MachineRegisterInfo.h =================================================================== --- include/llvm/CodeGen/MachineRegisterInfo.h +++ include/llvm/CodeGen/MachineRegisterInfo.h @@ -84,14 +84,15 @@ /// all registers that were disabled are removed from the list. SmallVector UpdatedCSRs; - /// RegAllocHints - This vector records register allocation hints for virtual - /// registers. For each virtual register, it keeps a register and hint type - /// pair making up the allocation hint. Hint type is target specific except - /// for the value 0 which means the second value of the pair is the preferred - /// register for allocation. For example, if the hint is <0, 1024>, it means - /// the allocator should prefer the physical register allocated to the virtual - /// register of the hint. - IndexedMap, VirtReg2IndexFunctor> RegAllocHints; + /// RegAllocHints - This vector records register allocation hints for + /// virtual registers. For each virtual register, it keeps a pair of hint + /// type and hints vector making up the allocation hints. Only the first + /// hint may be target specific, and in that case this is reflected by the + /// first member of the pair being non-zero. If the hinted register is + /// virtual, it means the allocator should prefer the physical register + /// allocated to it if any. + IndexedMap>, + VirtReg2IndexFunctor> RegAllocHints; /// PhysRegUseDefLists - This is an array of the head of the use/def list for /// physical registers. @@ -702,35 +703,55 @@ void clearVirtRegs(); /// setRegAllocationHint - Specify a register allocation hint for the - /// specified virtual register. + /// specified virtual register. This is typically used by target, and in case + /// of an earlier hint it will be overwritten. void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg) { assert(TargetRegisterInfo::isVirtualRegister(VReg)); RegAllocHints[VReg].first = Type; - RegAllocHints[VReg].second = PrefReg; + RegAllocHints[VReg].second.clear(); + RegAllocHints[VReg].second.push_back(PrefReg); } - /// Specify the preferred register allocation hint for the specified virtual - /// register. + /// addRegAllocationHint - Add a register allocation hint to the hints + /// vector for VReg. + void addRegAllocationHint(unsigned VReg, unsigned PrefReg) { + assert(TargetRegisterInfo::isVirtualRegister(VReg)); + RegAllocHints[VReg].second.push_back(PrefReg); + } + + /// Specify the preferred (target independent) register allocation hint for + /// the specified virtual register. void setSimpleHint(unsigned VReg, unsigned PrefReg) { setRegAllocationHint(VReg, /*Type=*/0, PrefReg); } /// getRegAllocationHint - Return the register allocation hint for the - /// specified virtual register. + /// specified virtual register. If there are many hints, this returns the + /// one with the greatest weight. std::pair getRegAllocationHint(unsigned VReg) const { assert(TargetRegisterInfo::isVirtualRegister(VReg)); - return RegAllocHints[VReg]; + unsigned BestHint = (RegAllocHints[VReg].second.size() ? + RegAllocHints[VReg].second[0] : 0); + return std::pair(RegAllocHints[VReg].first, BestHint); } - /// getSimpleHint - Return the preferred register allocation hint, or 0 if a - /// standard simple hint (Type == 0) is not set. + /// getSimpleHint - same as getRegAllocationHint except it will only return + /// a target independent hint. unsigned getSimpleHint(unsigned VReg) const { assert(TargetRegisterInfo::isVirtualRegister(VReg)); std::pair Hint = getRegAllocationHint(VReg); return Hint.first ? 0 : Hint.second; } + /// getRegAllocationHints - Return a reference to the vector of all + /// register allocation hints for VReg. + const std::pair> + &getRegAllocationHints(unsigned VReg) const { + assert(TargetRegisterInfo::isVirtualRegister(VReg)); + return RegAllocHints[VReg]; + } + /// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the /// specified register as undefined which causes the DBG_VALUE to be /// deleted during LiveDebugVariables analysis. Index: include/llvm/Target/TargetRegisterInfo.h =================================================================== --- include/llvm/Target/TargetRegisterInfo.h +++ include/llvm/Target/TargetRegisterInfo.h @@ -784,11 +784,10 @@ /// as returned from RegisterClassInfo::getOrder(). The hint registers must /// come from Order, and they must not be reserved. /// - /// The default implementation of this function can resolve - /// target-independent hints provided to MRI::setRegAllocationHint with - /// HintType == 0. Targets that override this function should defer to the - /// default implementation if they have no reason to change the allocation - /// order for VirtReg. There may be target-independent hints. + /// The default implementation of this function will only add target + /// independent register allocation hints. Targets that override this + /// function should typically call this default implementation as well and + /// expect to see generic copy hints added. virtual void getRegAllocationHints(unsigned VirtReg, ArrayRef Order, SmallVectorImpl &Hints, Index: lib/CodeGen/CalcSpillWeights.cpp =================================================================== --- lib/CodeGen/CalcSpillWeights.cpp +++ lib/CodeGen/CalcSpillWeights.cpp @@ -69,14 +69,16 @@ if (TargetRegisterInfo::isVirtualRegister(hreg)) return sub == hsub ? hreg : 0; + unsigned CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg); const TargetRegisterClass *rc = mri.getRegClass(reg); + if (rc->contains(CopiedPReg)) + return CopiedPReg; - // Only allow physreg hints in rc. - if (sub == 0) - return rc->contains(hreg) ? hreg : 0; + // Check if reg:sub matches so that a super register could be hinted. + if (sub) + return tri.getMatchingSuperReg(CopiedPReg, sub, rc); - // reg:sub should match the physreg hreg. - return tri.getMatchingSuperReg(hreg, sub, rc); + return 0; } // Check if all values in LI are rematerializable @@ -144,16 +146,27 @@ unsigned numInstr = 0; // Number of instructions using li SmallPtrSet visited; - // Find the best physreg hint and the best virtreg hint. - float bestPhys = 0, bestVirt = 0; - unsigned hintPhys = 0, hintVirt = 0; - - // Don't recompute a target specific hint. - bool noHint = mri.getRegAllocationHint(li.reg).first != 0; - // Don't recompute spill weight for an unspillable register. bool Spillable = li.isSpillable(); + // CopyHint is a sortable hint derived from a COPY instruction. + struct CopyHint { + unsigned Reg; + float Weight; + bool IsPhys; + CopyHint(unsigned R, float W, bool P) : Reg(R), Weight(W), IsPhys(P) {} + bool operator<(const CopyHint &rhs) const { + // Always prefer any physreg hint. + if (IsPhys != rhs.IsPhys) + return (IsPhys && !rhs.IsPhys); + if (Weight != rhs.Weight) + return (Weight > rhs.Weight); + // (just for the purpose of maintaining the set) + return Reg < rhs.Reg; + } + }; + + std::set CopyHints; for (MachineRegisterInfo::reg_instr_iterator I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end(); I != E; ) { @@ -186,7 +199,7 @@ } // Get allocation hints from copies. - if (noHint || !mi->isCopy()) + if (!mi->isCopy()) continue; unsigned hint = copyHint(mi, li.reg, tri, mri); if (!hint) @@ -196,27 +209,23 @@ // // FIXME: we probably shouldn't use floats at all. volatile float hweight = Hint[hint] += weight; - if (TargetRegisterInfo::isPhysicalRegister(hint)) { - if (hweight > bestPhys && mri.isAllocatable(hint)) { - bestPhys = hweight; - hintPhys = hint; - } - } else { - if (hweight > bestVirt) { - bestVirt = hweight; - hintVirt = hint; - } - } + CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint))); } Hint.clear(); - // Always prefer the physreg hint. - if (unsigned hint = hintPhys ? hintPhys : hintVirt) { - mri.setRegAllocationHint(li.reg, 0, hint); + // Pass all the sorted copy hints to mri. + std::pair TargetHint = mri.getRegAllocationHint(li.reg); + for (auto &Hint : CopyHints) { + if (Hint.Reg == TargetHint.second) + // Don't add again the target hint. + continue; + mri.addRegAllocationHint(li.reg, Hint.Reg); + } + + if (CopyHints.size()) // Weakly boost the spill weight of hinted registers. totalWeight *= 1.01F; - } // If the live interval was already unspillable, leave it that way. if (!Spillable) Index: lib/CodeGen/TargetRegisterInfo.cpp =================================================================== --- lib/CodeGen/TargetRegisterInfo.cpp +++ lib/CodeGen/TargetRegisterInfo.cpp @@ -368,31 +368,36 @@ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo &MRI = MF.getRegInfo(); - std::pair Hint = MRI.getRegAllocationHint(VirtReg); - - // Hints with HintType != 0 were set by target-dependent code. - // Such targets must provide their own implementation of - // TRI::getRegAllocationHints to interpret those hint types. - assert(Hint.first == 0 && "Target must implement TRI::getRegAllocationHints"); - - // Target-independent hints are either a physical or a virtual register. - unsigned Phys = Hint.second; - if (VRM && isVirtualRegister(Phys)) - Phys = VRM->getPhys(Phys); - - // Check that Phys is a valid hint in VirtReg's register class. - if (!isPhysicalRegister(Phys)) - return; - if (MRI.isReserved(Phys)) - return; - // Check that Phys is in the allocation order. We shouldn't heed hints - // from VirtReg's register class if they aren't in the allocation order. The - // target probably has a reason for removing the register. - if (!is_contained(Order, Phys)) - return; - - // All clear, tell the register allocator to prefer this register. - Hints.push_back(Phys); + const std::pair> &Hints_MRI = + MRI.getRegAllocationHints(VirtReg); + + // First hint may be a target hint. + bool Skip = (Hints_MRI.first != 0); + for (auto Reg : Hints_MRI.second) { + if (Skip) { + Skip = false; + continue; + } + + // Target-independent hints are either a physical or a virtual register. + unsigned Phys = Reg; + if (VRM && isVirtualRegister(Phys)) + Phys = VRM->getPhys(Phys); + + // Check that Phys is a valid hint in VirtReg's register class. + if (!isPhysicalRegister(Phys)) + continue; + if (MRI.isReserved(Phys)) + continue; + // Check that Phys is in the allocation order. We shouldn't heed hints + // from VirtReg's register class if they aren't in the allocation order. The + // target probably has a reason for removing the register. + if (!is_contained(Order, Phys)) + continue; + + // All clear, tell the register allocator to prefer this register. + Hints.push_back(Phys); + } } bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const { Index: test/CodeGen/AArch64/arm64-aapcs.ll =================================================================== --- test/CodeGen/AArch64/arm64-aapcs.ll +++ test/CodeGen/AArch64/arm64-aapcs.ll @@ -5,20 +5,20 @@ ; CHECK-LABEL: @test_i128_align define i128 @test_i128_align(i32, i128 %arg, i32 %after) { store i32 %after, i32* @var, align 4 -; CHECK: str w4, [{{x[0-9]+}}, :lo12:var] +; CHECK-DAG: str w4, [{{x[0-9]+}}, :lo12:var] ret i128 %arg -; CHECK: mov x0, x2 -; CHECK: mov x1, x3 +; CHECK-DAG: mov x0, x2 +; CHECK-DAG: mov x1, x3 } ; CHECK-LABEL: @test_i64x2_align define [2 x i64] @test_i64x2_align(i32, [2 x i64] %arg, i32 %after) { store i32 %after, i32* @var, align 4 -; CHECK: str w3, [{{x[0-9]+}}, :lo12:var] +; CHECK-DAG: str w3, [{{x[0-9]+}}, :lo12:var] ret [2 x i64] %arg -; CHECK: mov x0, x1 +; CHECK-DAG: mov x0, x1 ; CHECK: mov x1, x2 } Index: test/CodeGen/AArch64/func-argpassing.ll =================================================================== --- test/CodeGen/AArch64/func-argpassing.ll +++ test/CodeGen/AArch64/func-argpassing.ll @@ -164,11 +164,11 @@ define i64 @check_i128_regalign(i32 %val0, i128 %val1, i64 %val2) { ; CHECK-LABEL: check_i128_regalign store i128 %val1, i128* @var128 -; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 +; CHECK-DAG: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 ; CHECK-DAG: stp x2, x3, [x[[VAR128]]] ret i64 %val2 -; CHECK: mov x0, x4 +; CHECK-DAG: mov x0, x4 } define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3, Index: test/CodeGen/AArch64/swifterror.ll =================================================================== --- test/CodeGen/AArch64/swifterror.ll +++ test/CodeGen/AArch64/swifterror.ll @@ -40,11 +40,11 @@ ; CHECK-APPLE: mov [[ID:x[0-9]+]], x0 ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE: mov x0, x21 +; CHECK-APPLE: cbnz x0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller: @@ -263,11 +263,11 @@ ; CHECK-APPLE: mov [[ID:x[0-9]+]], x0 ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo_sret -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE: mov x0, x21 +; CHECK-APPLE: cbnz x0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller3: @@ -358,11 +358,11 @@ ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo_vararg -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE: mov x0, x21 +; CHECK-APPLE: cbnz x0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free entry: %error_ptr_ref = alloca swifterror %swift_error* Index: test/CodeGen/AArch64/win64_vararg.ll =================================================================== --- test/CodeGen/AArch64/win64_vararg.ll +++ test/CodeGen/AArch64/win64_vararg.ll @@ -161,25 +161,25 @@ ; CHECK: add x8, x8, #15 ; CHECK: mov x9, sp ; CHECK: and x8, x8, #0x1fffffff0 -; CHECK: sub x20, x9, x8 +; CHECK: sub [[REG:x[0-9]+]], x9, x8 ; CHECK: mov x19, x1 -; CHECK: mov x23, sp +; CHECK: mov [[REG2:x[0-9]+]], sp ; CHECK: stp x6, x7, [x29, #48] ; CHECK: stp x4, x5, [x29, #32] ; CHECK: stp x2, x3, [x29, #16] -; CHECK: mov sp, x20 -; CHECK: ldur x21, [x29, #-40] -; CHECK: sxtw x22, w0 +; CHECK: mov sp, [[REG]] +; CHECK: ldur [[REG3:x[0-9]+]], [x29, #-40] +; CHECK: sxtw [[REG4:x[0-9]+]], w0 ; CHECK: bl __local_stdio_printf_options ; CHECK: ldr x8, [x0] -; CHECK: mov x1, x20 -; CHECK: mov x2, x22 +; CHECK: mov x1, [[REG]] +; CHECK: mov x2, [[REG4]] ; CHECK: mov x3, x19 ; CHECK: orr x0, x8, #0x2 ; CHECK: mov x4, xzr -; CHECK: mov x5, x21 +; CHECK: mov x5, [[REG3]] ; CHECK: bl __stdio_common_vsprintf -; CHECK: mov sp, x23 +; CHECK: mov sp, [[REG2]] ; CHECK: sub sp, x29, #48 ; CHECK: ldp x29, x30, [sp, #48] ; CHECK: ldp x20, x19, [sp, #32] @@ -255,17 +255,15 @@ ; CHECK-LABEL: fixed_params ; CHECK: sub sp, sp, #32 -; CHECK: mov w8, w3 -; CHECK: mov w9, w2 -; CHECK: mov w10, w1 +; CHECK-DAG: mov w6, w3 +; CHECK-DAG: mov [[REG1:w[0-9]+]], w2 +; CHECK: mov w2, w1 ; CHECK: str w4, [sp] ; CHECK: fmov x1, d0 ; CHECK: fmov x3, d1 ; CHECK: fmov x5, d2 ; CHECK: fmov x7, d3 -; CHECK: mov w2, w10 -; CHECK: mov w4, w9 -; CHECK: mov w6, w8 +; CHECK: mov w4, [[REG1]] ; CHECK: str x30, [sp, #16] ; CHECK: str d4, [sp, #8] ; CHECK: bl varargs Index: test/CodeGen/AMDGPU/callee-special-input-sgprs.ll =================================================================== --- test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -208,8 +208,8 @@ ; GCN: enable_sgpr_workgroup_id_z = 0 ; GCN: s_mov_b32 s33, s8 -; GCN: s_mov_b32 s4, s33 -; GCN: s_mov_b32 s6, s7 +; GCN-DAG: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s6, s7 ; GCN: s_mov_b32 s32, s33 ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 { @@ -223,8 +223,8 @@ ; GCN: enable_sgpr_workgroup_id_z = 1 ; GCN: s_mov_b32 s33, s8 -; GCN: s_mov_b32 s4, s33 -; GCN: s_mov_b32 s6, s7 +; GCN-DAG: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s6, s7 ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 { call void @use_workgroup_id_z() @@ -396,7 +396,7 @@ ; GCN-DAG: s_mov_b32 s33, s8 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b -; GCN: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s4, s33 ; GCN-DAG: s_mov_b32 s6, s7 ; GCN-DAG: s_mov_b32 s32, s33 ; GCN: s_swappc_b64 @@ -412,7 +412,7 @@ ; GCN: s_mov_b32 s33, s8 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b -; GCN: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s4, s33 ; GCN-DAG: s_mov_b32 s6, s7 ; GCN: s_mov_b32 s32, s33 Index: test/CodeGen/AMDGPU/callee-special-input-vgprs.ll =================================================================== --- test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -220,8 +220,8 @@ ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z: ; GCN: enable_vgpr_workitem_id = 2 -; GCN: v_mov_b32_e32 v0, 0x22b -; GCN: v_mov_b32_e32 v1, v2 +; GCN-DAG: v_mov_b32_e32 v0, 0x22b +; GCN-DAG: v_mov_b32_e32 v1, v2 ; GCN: s_swappc_b64 ; GCN-NOT: v0 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 { Index: test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll @@ -41,7 +41,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_oeq: -; GCN: v_cmp_eq_f32_e64 +; GCN: v_cmp_eq_f32_e32 define amdgpu_kernel void @v_fcmp_f32_oeq(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 1) store i64 %result, i64 addrspace(1)* %out @@ -49,7 +49,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_one: -; GCN: v_cmp_neq_f32_e64 +; GCN: v_cmp_neq_f32_e32 define amdgpu_kernel void @v_fcmp_f32_one(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 6) store i64 %result, i64 addrspace(1)* %out @@ -57,7 +57,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_ogt: -; GCN: v_cmp_gt_f32_e64 +; GCN: v_cmp_gt_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ogt(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 2) store i64 %result, i64 addrspace(1)* %out @@ -65,7 +65,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_oge: -; GCN: v_cmp_ge_f32_e64 +; GCN: v_cmp_ge_f32_e32 define amdgpu_kernel void @v_fcmp_f32_oge(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 3) store i64 %result, i64 addrspace(1)* %out @@ -73,7 +73,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_olt: -; GCN: v_cmp_lt_f32_e64 +; GCN: v_cmp_lt_f32_e32 define amdgpu_kernel void @v_fcmp_f32_olt(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 4) store i64 %result, i64 addrspace(1)* %out @@ -81,7 +81,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_ole: -; GCN: v_cmp_le_f32_e64 +; GCN: v_cmp_le_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ole(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 5) store i64 %result, i64 addrspace(1)* %out @@ -90,7 +90,7 @@ ; GCN-LABEL: {{^}}v_fcmp_f32_ueq: -; GCN: v_cmp_nlg_f32_e64 +; GCN: v_cmp_nlg_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ueq(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 9) store i64 %result, i64 addrspace(1)* %out @@ -98,7 +98,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_une: -; GCN: v_cmp_neq_f32_e64 +; GCN: v_cmp_neq_f32_e32 define amdgpu_kernel void @v_fcmp_f32_une(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 14) store i64 %result, i64 addrspace(1)* %out @@ -106,7 +106,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_ugt: -; GCN: v_cmp_nle_f32_e64 +; GCN: v_cmp_nle_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ugt(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 10) store i64 %result, i64 addrspace(1)* %out @@ -114,7 +114,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_uge: -; GCN: v_cmp_nlt_f32_e64 +; GCN: v_cmp_nlt_f32_e32 define amdgpu_kernel void @v_fcmp_f32_uge(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 11) store i64 %result, i64 addrspace(1)* %out @@ -122,7 +122,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_ult: -; GCN: v_cmp_nge_f32_e64 +; GCN: v_cmp_nge_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ult(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 12) store i64 %result, i64 addrspace(1)* %out @@ -130,7 +130,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_ule: -; GCN: v_cmp_ngt_f32_e64 +; GCN: v_cmp_ngt_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ule(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 13) store i64 %result, i64 addrspace(1)* %out @@ -138,7 +138,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_oeq: -; GCN: v_cmp_eq_f64_e64 +; GCN: v_cmp_eq_f64_e32 define amdgpu_kernel void @v_fcmp_f64_oeq(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 1) store i64 %result, i64 addrspace(1)* %out @@ -146,7 +146,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_one: -; GCN: v_cmp_neq_f64_e64 +; GCN: v_cmp_neq_f64_e32 define amdgpu_kernel void @v_fcmp_f64_one(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 6) store i64 %result, i64 addrspace(1)* %out @@ -154,7 +154,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ogt: -; GCN: v_cmp_gt_f64_e64 +; GCN: v_cmp_gt_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ogt(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 2) store i64 %result, i64 addrspace(1)* %out @@ -162,7 +162,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_oge: -; GCN: v_cmp_ge_f64_e64 +; GCN: v_cmp_ge_f64_e32 define amdgpu_kernel void @v_fcmp_f64_oge(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 3) store i64 %result, i64 addrspace(1)* %out @@ -170,7 +170,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_olt: -; GCN: v_cmp_lt_f64_e64 +; GCN: v_cmp_lt_f64_e32 define amdgpu_kernel void @v_fcmp_f64_olt(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 4) store i64 %result, i64 addrspace(1)* %out @@ -178,7 +178,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ole: -; GCN: v_cmp_le_f64_e64 +; GCN: v_cmp_le_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ole(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 5) store i64 %result, i64 addrspace(1)* %out @@ -186,7 +186,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ueq: -; GCN: v_cmp_nlg_f64_e64 +; GCN: v_cmp_nlg_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ueq(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 9) store i64 %result, i64 addrspace(1)* %out @@ -194,7 +194,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_une: -; GCN: v_cmp_neq_f64_e64 +; GCN: v_cmp_neq_f64_e32 define amdgpu_kernel void @v_fcmp_f64_une(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 14) store i64 %result, i64 addrspace(1)* %out @@ -202,7 +202,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ugt: -; GCN: v_cmp_nle_f64_e64 +; GCN: v_cmp_nle_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ugt(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 10) store i64 %result, i64 addrspace(1)* %out @@ -210,7 +210,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_uge: -; GCN: v_cmp_nlt_f64_e64 +; GCN: v_cmp_nlt_f64_e32 define amdgpu_kernel void @v_fcmp_f64_uge(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 11) store i64 %result, i64 addrspace(1)* %out @@ -218,7 +218,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ult: -; GCN: v_cmp_nge_f64_e64 +; GCN: v_cmp_nge_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ult(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 12) store i64 %result, i64 addrspace(1)* %out @@ -226,7 +226,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ule: -; GCN: v_cmp_ngt_f64_e64 +; GCN: v_cmp_ngt_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ule(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 13) store i64 %result, i64 addrspace(1)* %out Index: test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll @@ -14,7 +14,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i32_eq: -; GCN: v_cmp_eq_u32_e64 +; GCN: v_cmp_eq_u32_e32 define amdgpu_kernel void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32) store i64 %result, i64 addrspace(1)* %out @@ -29,7 +29,7 @@ ret void } ; GCN-LABEL: {{^}}v_icmp_i32_ne: -; GCN: v_cmp_ne_u32_e64 +; GCN: v_cmp_ne_u32_e32 define amdgpu_kernel void @v_icmp_i32_ne(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33) store i64 %result, i64 addrspace(1)* %out @@ -37,7 +37,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u32_ugt: -; GCN: v_cmp_gt_u32_e64 +; GCN: v_cmp_gt_u32_e32 define amdgpu_kernel void @v_icmp_u32_ugt(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 34) store i64 %result, i64 addrspace(1)* %out @@ -45,7 +45,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u32_uge: -; GCN: v_cmp_ge_u32_e64 +; GCN: v_cmp_ge_u32_e32 define amdgpu_kernel void @v_icmp_u32_uge(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 35) store i64 %result, i64 addrspace(1)* %out @@ -53,7 +53,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u32_ult: -; GCN: v_cmp_lt_u32_e64 +; GCN: v_cmp_lt_u32_e32 define amdgpu_kernel void @v_icmp_u32_ult(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 36) store i64 %result, i64 addrspace(1)* %out @@ -61,7 +61,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u32_ule: -; GCN: v_cmp_le_u32_e64 +; GCN: v_cmp_le_u32_e32 define amdgpu_kernel void @v_icmp_u32_ule(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 37) store i64 %result, i64 addrspace(1)* %out @@ -69,7 +69,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i32_sgt: -; GCN: v_cmp_gt_i32_e64 +; GCN: v_cmp_gt_i32_e32 define amdgpu_kernel void @v_icmp_i32_sgt(i64 addrspace(1)* %out, i32 %src) #1 { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 38) store i64 %result, i64 addrspace(1)* %out @@ -77,7 +77,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i32_sge: -; GCN: v_cmp_ge_i32_e64 +; GCN: v_cmp_ge_i32_e32 define amdgpu_kernel void @v_icmp_i32_sge(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 39) store i64 %result, i64 addrspace(1)* %out @@ -85,14 +85,14 @@ } ; GCN-LABEL: {{^}}v_icmp_i32_slt: -; GCN: v_cmp_lt_i32_e64 +; GCN: v_cmp_lt_i32_e32 define amdgpu_kernel void @v_icmp_i32_slt(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 40) store i64 %result, i64 addrspace(1)* %out ret void } ; GCN-LABEL: {{^}}v_icmp_i32_sle: -; GCN: v_cmp_le_i32_e64 +; GCN: v_cmp_le_i32_e32 define amdgpu_kernel void @v_icmp_i32_sle(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 41) store i64 %result, i64 addrspace(1)* %out @@ -100,7 +100,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i64_eq: -; GCN: v_cmp_eq_u64_e64 +; GCN: v_cmp_eq_u64_e32 define amdgpu_kernel void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32) store i64 %result, i64 addrspace(1)* %out @@ -108,7 +108,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i64_ne: -; GCN: v_cmp_ne_u64_e64 +; GCN: v_cmp_ne_u64_e32 define amdgpu_kernel void @v_icmp_i64_ne(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33) store i64 %result, i64 addrspace(1)* %out @@ -116,7 +116,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u64_ugt: -; GCN: v_cmp_gt_u64_e64 +; GCN: v_cmp_gt_u64_e32 define amdgpu_kernel void @v_icmp_u64_ugt(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34) store i64 %result, i64 addrspace(1)* %out @@ -124,7 +124,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u64_uge: -; GCN: v_cmp_ge_u64_e64 +; GCN: v_cmp_ge_u64_e32 define amdgpu_kernel void @v_icmp_u64_uge(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35) store i64 %result, i64 addrspace(1)* %out @@ -132,7 +132,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u64_ult: -; GCN: v_cmp_lt_u64_e64 +; GCN: v_cmp_lt_u64_e32 define amdgpu_kernel void @v_icmp_u64_ult(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36) store i64 %result, i64 addrspace(1)* %out @@ -140,7 +140,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u64_ule: -; GCN: v_cmp_le_u64_e64 +; GCN: v_cmp_le_u64_e32 define amdgpu_kernel void @v_icmp_u64_ule(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37) store i64 %result, i64 addrspace(1)* %out @@ -148,7 +148,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i64_sgt: -; GCN: v_cmp_gt_i64_e64 +; GCN: v_cmp_gt_i64_e32 define amdgpu_kernel void @v_icmp_i64_sgt(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38) store i64 %result, i64 addrspace(1)* %out @@ -156,7 +156,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i64_sge: -; GCN: v_cmp_ge_i64_e64 +; GCN: v_cmp_ge_i64_e32 define amdgpu_kernel void @v_icmp_i64_sge(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39) store i64 %result, i64 addrspace(1)* %out @@ -164,14 +164,14 @@ } ; GCN-LABEL: {{^}}v_icmp_i64_slt: -; GCN: v_cmp_lt_i64_e64 +; GCN: v_cmp_lt_i64_e32 define amdgpu_kernel void @v_icmp_i64_slt(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40) store i64 %result, i64 addrspace(1)* %out ret void } ; GCN-LABEL: {{^}}v_icmp_i64_sle: -; GCN: v_cmp_le_i64_e64 +; GCN: v_cmp_le_i64_e32 define amdgpu_kernel void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41) store i64 %result, i64 addrspace(1)* %out Index: test/CodeGen/AMDGPU/ret.ll =================================================================== --- test/CodeGen/AMDGPU/ret.ll +++ test/CodeGen/AMDGPU/ret.ll @@ -126,9 +126,9 @@ ; GCN-LABEL: {{^}}vgpr_ps_addr119: ; GCN-DAG: v_mov_b32_e32 v0, v2 ; GCN-DAG: v_mov_b32_e32 v1, v3 -; GCN: v_mov_b32_e32 v2, v6 -; GCN: v_mov_b32_e32 v3, v8 -; GCN: v_mov_b32_e32 v4, v12 +; GCN-DAG: v_mov_b32_e32 v2, v6 +; GCN-DAG: v_mov_b32_e32 v3, v8 +; GCN-DAG: v_mov_b32_e32 v4, v12 ; GCN-NOT: s_endpgm define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 { bb: @@ -178,8 +178,8 @@ } ; GCN-LABEL: {{^}}sgpr: -; GCN: s_add_i32 s0, s3, 2 ; GCN: s_mov_b32 s2, s3 +; GCN: s_add_i32 s0, s2, 2 ; GCN-NOT: s_endpgm define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { bb: Index: test/CodeGen/AMDGPU/sgpr-control-flow.ll =================================================================== --- test/CodeGen/AMDGPU/sgpr-control-flow.ll +++ test/CodeGen/AMDGPU/sgpr-control-flow.ll @@ -77,8 +77,8 @@ ; SI-LABEL: {{^}}sgpr_if_else_valu_br: ; SI: s_add_i32 [[SGPR:s[0-9]+]] -; SI-NOT: s_add_i32 [[SGPR]] - +; SI: s_add_i32 [[SGPR]] +; NOTE: this is currently failing as the last check should actually be -NOT. define amdgpu_kernel void @sgpr_if_else_valu_br(i32 addrspace(1)* %out, float %a, i32 %b, i32 %c, i32 %d, i32 %e) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 Index: test/CodeGen/ARM/longMAC.ll =================================================================== --- test/CodeGen/ARM/longMAC.ll +++ test/CodeGen/ARM/longMAC.ll @@ -337,20 +337,20 @@ @global_b = external global i16, align 2 ;CHECK-LABEL: MACLongTest15 ;CHECK-T2-DSP-NOT: {{asr|lsr}} -;CHECK-T2-DSP: smlaltb r2, r3, r0, r1 +;CHECK-T2-DSP: mov r1, r3 +;CHECK-T2-DSP: smlaltb r2, r1, r0, r3 ;CHECK-T2-DSP-NEXT: mov r0, r2 -;CHECK-T2-DSP-NEXT: mov r1, r3 ;CHECK-V5TE-NOT: {{asr|lsr}} -;CHECK-V5TE: smlaltb r2, r3, r0, r1 +;CHECK-V5TE: mov r1, r3 +;CHECK-V5TE: smlaltb r2, r1, r0, r3 ;CHECK-V5TE-NEXT: mov r0, r2 -;CHECK-V5TE-NEXT: mov r1, r3 ;CHECK-V7-LE-NOT: {{asr|lsr}} -;CHECK-V7-LE: smlaltb r2, r3, r0, r1 +;CHECK-V7-LE: mov r1, r3 +;CHECK-V7-LE: smlaltb r2, r1, r0, r3 ;CHECK-V7-LE-NEXT: mov r0, r2 -;CHECK-V7-LE-NEXT: mov r1, r3 -;CHECK-V7-THUMB-BE: smlaltb r3, r2, r0, r1 +;CHECK-V7-THUMB-BE: mov r1, r3 +;CHECK-V7-THUMB-BE: smlaltb r1, r2, r0, r3 ;CHECK-V7-THUMB-BE-NEXT: mov r0, r2 -;CHECK-V7-THUMB-BE-NEXT: mov r1, r3 ;CHECK-LE-NOT: smlaltb ;CHECK-BE-NOT: smlaltb ;CHECK-V6M-THUMB-NOT: smlaltb @@ -368,19 +368,19 @@ ;CHECK-LABEL: MACLongTest16 ;CHECK-T2-DSP-NOT: {{asr|lsr}} -;CHECK-T2-DSP: smlalbt r2, r3, r1, r0 +;CHECK-T2-DSP: mov r1, r3 +;CHECK-T2-DSP: smlalbt r2, r1, r3, r0 ;CHECK-T2-DSP-NEXT: mov r0, r2 -;CHECK-T2-DSP-NEXT: mov r1, r3 ;CHECK-V5TE-NOT: {{asr|lsr}} -;CHECK-V5TE: smlalbt r2, r3, r1, r0 +;CHECK-V5TE: mov r1, r3 +;CHECK-V5TE: smlalbt r2, r1, r3, r0 ;CHECK-V5TE-NEXT: mov r0, r2 -;CHECK-V5TE-NEXT: mov r1, r3 -;CHECK-V7-LE: smlalbt r2, r3, r1, r0 +;CHECK-V7-LE: mov r1, r3 +;CHECK-V7-LE: smlalbt r2, r1, r3, r0 ;CHECK-V7-LE-NEXT: mov r0, r2 -;CHECK-V7-LE-NEXT: mov r1, r3 -;CHECK-V7-THUMB-BE: smlalbt r3, r2, r1, r0 +;CHECK-V7-THUMB-BE: mov r1, r3 +;CHECK-V7-THUMB-BE: smlalbt r1, r2, r3, r0 ;CHECK-V7-THUMB-BE-NEXT: mov r0, r2 -;CHECK-V7-THUMB-BE-NEXT: mov r1, r3 ;CHECK-LE-NOT: smlalbt ;CHECK-BE-NOT: smlalbt ;CHECK-V6M-THUMB-NOT: smlalbt Index: test/CodeGen/ARM/select_xform.ll =================================================================== --- test/CodeGen/ARM/select_xform.ll +++ test/CodeGen/ARM/select_xform.ll @@ -4,13 +4,13 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind { ; ARM-LABEL: t1: -; ARM: suble r1, r1, #-2147483647 ; ARM: mov r0, r1 +; ARM: suble r0, r0, #-2147483647 ; T2-LABEL: t1: -; T2: mvn r0, #-2147483648 -; T2: addle r1, r0 ; T2: mov r0, r1 +; T2: mvn r1, #-2147483648 +; T2: addle r0, r1 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 2147483647 %tmp3 = add i32 %tmp2, %b @@ -19,12 +19,12 @@ define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; ARM-LABEL: t2: -; ARM: suble r1, r1, #10 ; ARM: mov r0, r1 +; ARM: suble r0, r0, #10 ; T2-LABEL: t2: -; T2: suble r1, #10 ; T2: mov r0, r1 +; T2: suble r0, #10 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 10 %tmp3 = sub i32 %b, %tmp2 Index: test/CodeGen/ARM/ssp-data-layout.ll =================================================================== --- test/CodeGen/ARM/ssp-data-layout.ll +++ test/CodeGen/ARM/ssp-data-layout.ll @@ -450,7 +450,7 @@ ; CHECK: strb r0, [sp, #68] ; CHECK: bl end_struct_small_char ; CHECK: bl get_struct_large_char2 -; CHECK: strb r0, [sp, #106] +; CHECK: strb r0, [sp, #110] ; CHECK: bl end_struct_large_char2 %a = alloca %struct.struct_small_char, align 1 %b = alloca %struct.struct_large_char2, align 1 Index: test/CodeGen/ARM/struct_byval_arm_t1_t2.ll =================================================================== --- test/CodeGen/ARM/struct_byval_arm_t1_t2.ll +++ test/CodeGen/ARM/struct_byval_arm_t1_t2.ll @@ -122,17 +122,17 @@ ;THUMB1-LABEL: test_A_8: ;T1POST-LABEL: test_A_8: define void @test_A_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.A, align 8 call void @use_A(%struct.A* byval align 8 %a) @@ -144,19 +144,19 @@ ;THUMB1-LABEL: test_A_16: ;T1POST-LABEL: test_A_16: define void @test_A_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.A, align 16 call void @use_A(%struct.A* byval align 16 %a) @@ -239,21 +239,21 @@ ;THUMB1-LABEL: test_B_8: ;T1POST-LABEL: test_B_8: define void @test_B_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.B, align 8 call void @use_B(%struct.B* byval align 8 %a) @@ -265,21 +265,21 @@ ;THUMB1-LABEL: test_B_16: ;T1POST-LABEL: test_B_16: define void @test_B_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.B, align 16 call void @use_B(%struct.B* byval align 16 %a) @@ -363,22 +363,22 @@ ;THUMB1-LABEL: test_C_8: ;T1POST-LABEL: test_C_8: define void @test_C_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #1 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.C, align 8 call void @use_C(%struct.C* byval align 8 %a) @@ -390,22 +390,22 @@ ;THUMB1-LABEL: test_C_16: ;T1POST-LABEL: test_C_16: define void @test_C_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #1 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.C, align 16 call void @use_C(%struct.C* byval align 16 %a) @@ -492,21 +492,21 @@ ;THUMB1-LABEL: test_D_8: ;T1POST-LABEL: test_D_8: define void @test_D_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.D, align 8 call void @use_D(%struct.D* byval align 8 %a) @@ -518,21 +518,21 @@ ;THUMB1-LABEL: test_D_16: ;T1POST-LABEL: test_D_16: define void @test_D_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.D, align 16 call void @use_D(%struct.D* byval align 16 %a) @@ -627,25 +627,25 @@ ;THUMB1-LABEL: test_E_8: ;T1POST-LABEL: test_E_8: define void @test_E_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne ;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne ;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.E, align 8 call void @use_E(%struct.E* byval align 8 %a) @@ -657,25 +657,25 @@ ;THUMB1-LABEL: test_E_16: ;T1POST-LABEL: test_E_16: define void @test_E_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne ;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne ;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.E, align 16 call void @use_E(%struct.E* byval align 16 %a) @@ -771,18 +771,18 @@ ;THUMB1-LABEL: test_F_8: ;T1POST-LABEL: test_F_8: define void @test_F_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne ;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 @@ -790,7 +790,7 @@ ;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #1 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.F, align 8 call void @use_F(%struct.F* byval align 8 %a) @@ -802,18 +802,18 @@ ;THUMB1-LABEL: test_F_16: ;T1POST-LABEL: test_F_16: define void @test_F_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne ;ARM: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;THUMB2: ldrb r{{[0-9]+}}, [{{.*}}], #1 ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne ;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 @@ -821,7 +821,7 @@ ;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #1 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.F, align 16 call void @use_F(%struct.F* byval align 16 %a) @@ -896,17 +896,17 @@ ;THUMB1-LABEL: test_G_8: ;T1POST-LABEL: test_G_8: define void @test_G_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.G, align 8 call void @use_G(%struct.G* byval align 8 %a) @@ -918,17 +918,17 @@ ;THUMB1-LABEL: test_G_16: ;T1POST-LABEL: test_G_16: define void @test_G_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.G, align 16 call void @use_G(%struct.G* byval align 16 %a) @@ -1003,17 +1003,17 @@ ;THUMB1-LABEL: test_H_8: ;T1POST-LABEL: test_H_8: define void @test_H_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.H, align 8 call void @use_H(%struct.H* byval align 8 %a) @@ -1025,17 +1025,17 @@ ;THUMB1-LABEL: test_H_16: ;T1POST-LABEL: test_H_16: define void @test_H_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.H, align 16 call void @use_H(%struct.H* byval align 16 %a) @@ -1110,17 +1110,17 @@ ;THUMB1-LABEL: test_I_8: ;T1POST-LABEL: test_I_8: define void @test_I_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.I, align 8 call void @use_I(%struct.I* byval align 8 %a) @@ -1132,17 +1132,17 @@ ;THUMB1-LABEL: test_I_16: ;T1POST-LABEL: test_I_16: define void @test_I_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.I, align 16 call void @use_I(%struct.I* byval align 16 %a) @@ -1229,21 +1229,21 @@ ;THUMB1-LABEL: test_J_8: ;T1POST-LABEL: test_J_8: define void @test_J_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.J, align 8 call void @use_J(%struct.J* byval align 8 %a) @@ -1255,21 +1255,21 @@ ;THUMB1-LABEL: test_J_16: ;T1POST-LABEL: test_J_16: define void @test_J_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.J, align 16 call void @use_J(%struct.J* byval align 16 %a) @@ -1356,21 +1356,21 @@ ;THUMB1-LABEL: test_K_8: ;T1POST-LABEL: test_K_8: define void @test_K_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.K, align 8 call void @use_K(%struct.K* byval align 8 %a) @@ -1382,21 +1382,21 @@ ;THUMB1-LABEL: test_K_16: ;T1POST-LABEL: test_K_16: define void @test_K_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.K, align 16 call void @use_K(%struct.K* byval align 16 %a) @@ -1483,21 +1483,21 @@ ;THUMB1-LABEL: test_L_8: ;T1POST-LABEL: test_L_8: define void @test_L_8() { -;ARM: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne -;THUMB2: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne -;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.L, align 8 call void @use_L(%struct.L* byval align 8 %a) @@ -1509,21 +1509,21 @@ ;THUMB1-LABEL: test_L_16: ;T1POST-LABEL: test_L_16: define void @test_L_16() { -;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;ARM: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;ARM: bne -;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;THUMB2: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB2: bne ;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4 ;NO_NEON: bne -;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! ;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}} ;THUMB1: adds [[BASE]], #4 ;THUMB1: bne -;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]! +;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]! entry: %a = alloca %struct.L, align 16 call void @use_L(%struct.L* byval align 16 %a) Index: test/CodeGen/ARM/swifterror.ll =================================================================== --- test/CodeGen/ARM/swifterror.ll +++ test/CodeGen/ARM/swifterror.ll @@ -39,11 +39,11 @@ ; CHECK-APPLE-DAG: mov [[ID:r[0-9]+]], r0 ; CHECK-APPLE-DAG: mov r8, #0 ; CHECK-APPLE: bl {{.*}}foo -; CHECK-APPLE: cmp r8, #0 +; CHECK-APPLE: mov r0, r8 +; CHECK-APPLE: cmp r0, #0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r8, #8] +; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r0, #8] ; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov r0, r8 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller: @@ -138,7 +138,7 @@ ; CHECK-APPLE: eq ; CHECK-APPLE: mov r0, #16 ; CHECK-APPLE: malloc -; CHECK-APPLE: mov [[ID:r[0-9]+]], #1 +; CHECK-APPLE-DAG: mov [[ID:r[0-9]+]], #1 ; CHECK-APPLE-DAG: mov r8, r{{.*}} ; CHECK-APPLE-DAG: strb [[ID]], [r{{.*}}, #8] @@ -177,14 +177,13 @@ ; CHECK-APPLE-LABEL: foo_loop: ; CHECK-APPLE: mov [[CODE:r[0-9]+]], r0 ; swifterror is kept in a register -; CHECK-APPLE: mov [[ID:r[0-9]+]], r8 ; CHECK-APPLE: cmp [[CODE]], #0 ; CHECK-APPLE: beq ; CHECK-APPLE: mov r0, #16 ; CHECK-APPLE: malloc -; CHECK-APPLE: strb r{{.*}}, [{{.*}}[[ID]], #8] +; CHECK-APPLE: mov r8, r0 +; CHECK-APPLE: strb r{{.*}}, [r8, #8] ; CHECK-APPLE: ble -; CHECK-APPLE: mov r8, [[ID]] ; CHECK-O0-LABEL: foo_loop: ; CHECK-O0: mov r{{.*}}, r8 @@ -266,11 +265,11 @@ ; CHECK-APPLE: mov [[ID:r[0-9]+]], r0 ; CHECK-APPLE: mov r8, #0 ; CHECK-APPLE: bl {{.*}}foo_sret -; CHECK-APPLE: cmp r8, #0 +; CHECK-APPLE: mov r0, r8 +; CHECK-APPLE: cmp r0, #0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r8, #8] +; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r0, #8] ; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov r0, r8 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller3: @@ -314,10 +313,9 @@ ; CHECK-APPLE-LABEL: foo_vararg: ; CHECK-APPLE: mov r0, #16 ; CHECK-APPLE: malloc -; CHECK-APPLE: mov [[REG:r[0-9]+]], r0 +; CHECK-APPLE: mov r8, r0 ; CHECK-APPLE: mov [[ID:r[0-9]+]], #1 -; CHECK-APPLE-DAG: strb [[ID]], [{{.*}}[[REG]], #8] -; CHECK-APPLE-DAG: mov r8, [[REG]] +; CHECK-APPLE-DAG: strb [[ID]], [r8, #8] entry: %call = call i8* @malloc(i64 16) @@ -348,11 +346,11 @@ ; CHECK-APPLE: mov [[ID:r[0-9]+]], r0 ; CHECK-APPLE: mov r8, #0 ; CHECK-APPLE: bl {{.*}}foo_vararg -; CHECK-APPLE: cmp r8, #0 +; CHECK-APPLE: mov r0, r8 +; CHECK-APPLE: cmp r0, #0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r8, #8] +; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r0, #8] ; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov r0, r8 ; CHECK-APPLE: bl {{.*}}free entry: %error_ptr_ref = alloca swifterror %swift_error* Index: test/CodeGen/BPF/alu8.ll =================================================================== --- test/CodeGen/BPF/alu8.ll +++ test/CodeGen/BPF/alu8.ll @@ -9,22 +9,22 @@ define i8 @add(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: add: -; CHECK: r1 += r2 # encoding: [0x0f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] -; CHECK: r0 = r1 # encoding: [0xbf,0x10,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 = r1 # encoding: [0xbf,0x10,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 += r2 # encoding: [0x0f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %1 = add i8 %a, %b ret i8 %1 } define i8 @and(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: and: -; CHECK: r1 &= r2 # encoding: [0x5f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 &= r2 # encoding: [0x5f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %1 = and i8 %a, %b ret i8 %1 } define i8 @bis(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: bis: -; CHECK: r1 |= r2 # encoding: [0x4f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 |= r2 # encoding: [0x4f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %1 = or i8 %a, %b ret i8 %1 } @@ -39,7 +39,7 @@ define i8 @xor(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: xor: -; CHECK: r1 ^= r2 # encoding: [0xaf,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 ^= r2 # encoding: [0xaf,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %1 = xor i8 %a, %b ret i8 %1 } Index: test/CodeGen/BPF/basictest.ll =================================================================== --- test/CodeGen/BPF/basictest.ll +++ test/CodeGen/BPF/basictest.ll @@ -4,7 +4,7 @@ %tmp.1 = add i32 %X, 1 ret i32 %tmp.1 ; CHECK-LABEL: test0: -; CHECK: r1 += 1 +; CHECK: r0 += 1 } ; CHECK-LABEL: store_imm: Index: test/CodeGen/BPF/cmp.ll =================================================================== --- test/CodeGen/BPF/cmp.ll +++ test/CodeGen/BPF/cmp.ll @@ -17,7 +17,7 @@ %.0 = phi i8 [ %3, %2 ], [ %5, %4 ] ret i8 %.0 ; CHECK-LABEL:foo_cmp1: -; CHECK: if r2 s>= r1 +; CHECK: if r0 s>= r1 } ; Function Attrs: nounwind readnone uwtable @@ -37,7 +37,7 @@ %.0 = phi i8 [ %3, %2 ], [ %5, %4 ] ret i8 %.0 ; CHECK-LABEL:foo_cmp2: -; CHECK: if r2 s> r1 +; CHECK: if r0 s> r1 } ; Function Attrs: nounwind readnone uwtable @@ -57,7 +57,7 @@ %.0 = phi i8 [ %3, %2 ], [ %5, %4 ] ret i8 %.0 ; CHECK-LABEL:foo_cmp3: -; CHECK: if r1 s>= r2 +; CHECK: if r1 s>= r0 } ; Function Attrs: nounwind readnone uwtable @@ -77,7 +77,7 @@ %.0 = phi i8 [ %3, %2 ], [ %5, %4 ] ret i8 %.0 ; CHECK-LABEL:foo_cmp4: -; CHECK: if r1 s> r2 +; CHECK: if r1 s> r0 } ; Function Attrs: nounwind readnone uwtable @@ -86,9 +86,9 @@ %a.b = select i1 %1, i8 %a, i8 %b ret i8 %a.b ; CHECK-LABEL:min: -; CHECK: if r2 s> r1 -; CHECK: r1 = r2 ; CHECK: r0 = r1 +; CHECK: if r2 s> r0 +; CHECK: r0 = r2 } ; Function Attrs: nounwind readnone uwtable @@ -97,7 +97,7 @@ %a.b = select i1 %1, i8 %a, i8 %b ret i8 %a.b ; CHECK-LABEL:minu: -; CHECK: if r3 > r1 +; CHECK: if r1 > r0 } ; Function Attrs: nounwind readnone uwtable @@ -106,7 +106,7 @@ %a.b = select i1 %1, i8 %a, i8 %b ret i8 %a.b ; CHECK-LABEL:max: -; CHECK: if r1 s> r2 +; CHECK: if r0 s> r2 } ; Function Attrs: nounwind readnone uwtable Index: test/CodeGen/BPF/dwarfdump.ll =================================================================== --- test/CodeGen/BPF/dwarfdump.ll +++ test/CodeGen/BPF/dwarfdump.ll @@ -61,4 +61,4 @@ ; CHECK: file_names[ 1] 0 0x00000000 0x00000000 testprog.c ; CHECK: 0x0000000000000000 2 -; CHECK: 0x0000000000000020 7 +; CHECK: 0x0000000000000028 7 Index: test/CodeGen/BPF/intrinsics.ll =================================================================== --- test/CodeGen/BPF/intrinsics.ll +++ test/CodeGen/BPF/intrinsics.ll @@ -83,15 +83,15 @@ ret i32 %conv5 ; CHECK-LABEL: bswap: ; CHECK-EL: r1 = be64 r1 # encoding: [0xdc,0x01,0x00,0x00,0x40,0x00,0x00,0x00] -; CHECK-EL: r2 = be32 r2 # encoding: [0xdc,0x02,0x00,0x00,0x20,0x00,0x00,0x00] -; CHECK-EL: r2 += r1 # encoding: [0x0f,0x12,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK-EL: r0 = be32 r0 # encoding: [0xdc,0x00,0x00,0x00,0x20,0x00,0x00,0x00] +; CHECK-EL: r0 += r1 # encoding: [0x0f,0x10,0x00,0x00,0x00,0x00,0x00,0x00] ; CHECK-EL: r3 = be16 r3 # encoding: [0xdc,0x03,0x00,0x00,0x10,0x00,0x00,0x00] -; CHECK-EL: r2 += r3 # encoding: [0x0f,0x32,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK-EL: r0 += r3 # encoding: [0x0f,0x30,0x00,0x00,0x00,0x00,0x00,0x00] ; CHECK-EB: r1 = le64 r1 # encoding: [0xd4,0x10,0x00,0x00,0x00,0x00,0x00,0x40] -; CHECK-EB: r2 = le32 r2 # encoding: [0xd4,0x20,0x00,0x00,0x00,0x00,0x00,0x20] -; CHECK-EB: r2 += r1 # encoding: [0x0f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK-EB: r0 = le32 r0 # encoding: [0xd4,0x00,0x00,0x00,0x00,0x00,0x00,0x20] +; CHECK-EB: r0 += r1 # encoding: [0x0f,0x01,0x00,0x00,0x00,0x00,0x00,0x00] ; CHECK-EB: r3 = le16 r3 # encoding: [0xd4,0x30,0x00,0x00,0x00,0x00,0x00,0x10] -; CHECK-EB: r2 += r3 # encoding: [0x0f,0x23,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK-EB: r0 += r3 # encoding: [0x0f,0x03,0x00,0x00,0x00,0x00,0x00,0x00] } declare i64 @llvm.bswap.i64(i64) #1 Index: test/CodeGen/BPF/objdump_intrinsics.ll =================================================================== --- test/CodeGen/BPF/objdump_intrinsics.ll +++ test/CodeGen/BPF/objdump_intrinsics.ll @@ -83,15 +83,15 @@ ret i32 %conv5 ; CHECK-LABEL: bswap: ; CHECK-EL: r1 = be64 r1 -; CHECK-EL: r2 = be32 r2 -; CHECK-EL: r2 += r1 +; CHECK-EL: r0 = be32 r0 +; CHECK-EL: r0 += r1 ; CHECK-EL: r3 = be16 r3 -; CHECK-EL: r2 += r3 +; CHECK-EL: r0 += r3 ; CHECK-EB: r1 = le64 r1 -; CHECK-EB: r2 = le32 r2 -; CHECK-EB: r2 += r1 +; CHECK-EB: r0 = le32 r0 +; CHECK-EB: r0 += r1 ; CHECK-EB: r3 = le16 r3 -; CHECK-EB: r2 += r3 +; CHECK-EB: r0 += r3 } declare i64 @llvm.bswap.i64(i64) #1 Index: test/CodeGen/BPF/sanity.ll =================================================================== --- test/CodeGen/BPF/sanity.ll +++ test/CodeGen/BPF/sanity.ll @@ -7,7 +7,7 @@ %1 = add nsw i32 %b, %a ret i32 %1 ; CHECK-LABEL: foo_int: -; CHECK: r2 += r1 +; CHECK: r0 += r1 } ; Function Attrs: nounwind readnone uwtable @@ -15,9 +15,9 @@ %1 = add i8 %b, %a ret i8 %1 ; CHECK-LABEL: foo_char: -; CHECK: r2 += r1 -; CHECK: r2 <<= 56 -; CHECK: r2 s>>= 56 +; CHECK: r0 += r1 +; CHECK: r0 <<= 56 +; CHECK: r0 s>>= 56 } ; Function Attrs: nounwind readnone uwtable @@ -26,9 +26,9 @@ %2 = sub i64 %1, %c ret i64 %2 ; CHECK-LABEL: foo_ll: -; CHECK: r2 += r1 -; CHECK: r2 -= r3 ; CHECK: r0 = r2 +; CHECK: r0 += r1 +; CHECK: r0 -= r3 } ; Function Attrs: nounwind uwtable @@ -60,7 +60,7 @@ %a.b = select i1 %1, i8 %a, i8 %b ret i8 %a.b ; CHECK-LABEL: foo_cmp: -; CHECK: if r2 s> r1 +; CHECK: if r2 s> r0 } ; Function Attrs: nounwind readnone uwtable @@ -82,7 +82,7 @@ %.0 = phi i32 [ %4, %2 ], [ %7, %5 ] ret i32 %.0 ; CHECK-LABEL: foo_muldiv: -; CHECK: r2 *= r3 +; CHECK: r0 *= r3 } ; Function Attrs: nounwind uwtable Index: test/CodeGen/BPF/shifts.ll =================================================================== --- test/CodeGen/BPF/shifts.ll +++ test/CodeGen/BPF/shifts.ll @@ -3,7 +3,7 @@ define zeroext i8 @lshr8(i8 zeroext %a, i8 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: lshr8: -; CHECK: r1 >>= r2 # encoding: [0x7f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 >>= r2 # encoding: [0x7f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shr = lshr i8 %a, %cnt ret i8 %shr } @@ -11,7 +11,7 @@ define signext i8 @ashr8(i8 signext %a, i8 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: ashr8: -; CHECK: r1 s>>= r2 # encoding: [0xcf,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 s>>= r2 # encoding: [0xcf,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shr = ashr i8 %a, %cnt ret i8 %shr } @@ -19,7 +19,7 @@ define zeroext i8 @shl8(i8 zeroext %a, i8 zeroext %cnt) nounwind readnone { entry: ; CHECK: shl8 -; CHECK: r1 <<= r2 # encoding: [0x6f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 <<= r2 # encoding: [0x6f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shl = shl i8 %a, %cnt ret i8 %shl } @@ -27,7 +27,7 @@ define zeroext i16 @lshr16(i16 zeroext %a, i16 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: lshr16: -; CHECK: r1 >>= r2 # encoding: [0x7f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 >>= r2 # encoding: [0x7f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shr = lshr i16 %a, %cnt ret i16 %shr } @@ -35,7 +35,7 @@ define signext i16 @ashr16(i16 signext %a, i16 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: ashr16: -; CHECK: r1 s>>= r2 # encoding: [0xcf,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 s>>= r2 # encoding: [0xcf,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shr = ashr i16 %a, %cnt ret i16 %shr } @@ -43,7 +43,7 @@ define zeroext i16 @shl16(i16 zeroext %a, i16 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: shl16: -; CHECK: r1 <<= r2 # encoding: [0x6f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 <<= r2 # encoding: [0x6f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shl = shl i16 %a, %cnt ret i16 %shl } @@ -51,8 +51,8 @@ define zeroext i32 @lshr32(i32 zeroext %a, i32 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: lshr32: -; CHECK: r1 >>= r2 # encoding: [0x7f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] -; CHECK: r1 <<= 32 # encoding: [0x67,0x01,0x00,0x00,0x20,0x00,0x00,0x00] +; CHECK: r0 >>= r2 # encoding: [0x7f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 <<= 32 # encoding: [0x67,0x00,0x00,0x00,0x20,0x00,0x00,0x00] %shr = lshr i32 %a, %cnt ret i32 %shr } @@ -60,7 +60,7 @@ define signext i32 @ashr32(i32 signext %a, i32 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: ashr32: -; CHECK: r1 s>>= r2 # encoding: [0xcf,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 s>>= r2 # encoding: [0xcf,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shr = ashr i32 %a, %cnt ret i32 %shr } @@ -68,7 +68,7 @@ define zeroext i32 @shl32(i32 zeroext %a, i32 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: shl32: -; CHECK: r1 <<= r2 # encoding: [0x6f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 <<= r2 # encoding: [0x6f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shl = shl i32 %a, %cnt ret i32 %shl } @@ -76,7 +76,7 @@ define zeroext i64 @lshr64(i64 zeroext %a, i64 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: lshr64: -; CHECK: r1 >>= r2 # encoding: [0x7f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 >>= r2 # encoding: [0x7f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shr = lshr i64 %a, %cnt ret i64 %shr } @@ -84,7 +84,7 @@ define signext i64 @ashr64(i64 signext %a, i64 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: ashr64: -; CHECK: r1 s>>= r2 # encoding: [0xcf,0x21,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 s>>= r2 # encoding: [0xcf,0x20,0x00,0x00,0x00,0x00,0x00,0x00] %shr = ashr i64 %a, %cnt ret i64 %shr } @@ -92,8 +92,8 @@ define zeroext i64 @shl64(i64 zeroext %a, i64 zeroext %cnt) nounwind readnone { entry: ; CHECK-LABEL: shl64: -; CHECK: r1 <<= r2 # encoding: [0x6f,0x21,0x00,0x00,0x00,0x00,0x00,0x00] ; CHECK: r0 = r1 # encoding: [0xbf,0x10,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK: r0 <<= r2 # encoding: [0x6f,0x20,0x00,0x00,0x00,0x00,0x00,0x00] ; CHECK: exit # encoding: [0x95,0x00,0x00,0x00,0x00,0x00,0x00,0x00] %shl = shl i64 %a, %cnt ret i64 %shl Index: test/CodeGen/Hexagon/mul64-sext.ll =================================================================== --- test/CodeGen/Hexagon/mul64-sext.ll +++ test/CodeGen/Hexagon/mul64-sext.ll @@ -75,9 +75,9 @@ } ; CHECK-LABEL: mul_nac_2 -; CHECK: r0 = memw(r0+#0) -; CHECK: r5:4 -= mpy(r2,r0) ; CHECK: r1:0 = combine(r5,r4) +; CHECK: r6 = memw(r0+#0) +; CHECK: r1:0 -= mpy(r2,r6) ; CHECK: jumpr r31 define i64 @mul_nac_2(i32* %a0, i64 %a1, i64 %a2) #0 { b3: Index: test/CodeGen/Hexagon/pred-absolute-store.ll =================================================================== --- test/CodeGen/Hexagon/pred-absolute-store.ll +++ test/CodeGen/Hexagon/pred-absolute-store.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=hexagon < %s | FileCheck %s ; Check that we are able to predicate instructions with absolute ; addressing mode. -; CHECK: if ({{!?}}p{{[0-3]}}) memw(##gvar) = r{{[0-9]+}} +; CHECK: if ({{!?}}p{{[0-3]}}.new) memw(##gvar) = r{{[0-9]+}} @gvar = external global i32 define i32 @test2(i32 %a, i32 %b) nounwind { Index: test/CodeGen/Mips/Fast-ISel/sel1.ll =================================================================== --- test/CodeGen/Mips/Fast-ISel/sel1.ll +++ test/CodeGen/Mips/Fast-ISel/sel1.ll @@ -84,11 +84,11 @@ entry: ; CHECK-LABEL: sel_float2: + ; CHECK: mov.s $f0, $f14 ; CHECK-DAG: xor $[[T0:[0-9]+]], $6, $zero ; CHECK: sltu $[[T1:[0-9]+]], $zero, $[[T0]] ; CHECK-NEXT: andi $[[T2:[0-9]+]], $[[T1]], 1 - ; CHECK: movn.s $f14, $f12, $[[T2]] - ; CHECK: mov.s $f0, $f14 + ; CHECK: movn.s $f0, $f12, $[[T2]] %cond = icmp ne i32 %j, 0 %res = select i1 %cond, float %k, float %l ret float %res @@ -114,12 +114,12 @@ entry: ; CHECK-LABEL: sel_double2: + ; CHECK: mov.d $f0, $f14 ; CHECK-DAG: lw $[[SEL:[0-9]+]], 16($sp) ; CHECK-DAG: xor $[[T0:[0-9]+]], $[[SEL]], $zero ; CHECK: sltu $[[T1:[0-9]+]], $zero, $[[T0]] ; CHECK-NEXT: andi $[[T2:[0-9]+]], $[[T1]], 1 - ; CHECK: movn.d $f14, $f12, $[[T2]] - ; CHECK: mov.d $f0, $f14 + ; CHECK: movn.d $f0, $f12, $[[T2]] %cond = icmp ne i32 %j, 0 %res = select i1 %cond, double %k, double %l ret double %res Index: test/CodeGen/Mips/analyzebranch.ll =================================================================== --- test/CodeGen/Mips/analyzebranch.ll +++ test/CodeGen/Mips/analyzebranch.ll @@ -16,7 +16,7 @@ ; 32-GPR: mtc1 $zero, $[[Z:f[0-9]]] ; 32-GPR: mthc1 $zero, $[[Z:f[0-9]]] ; 64-GPR: dmtc1 $zero, $[[Z:f[0-9]]] -; GPR: cmp.lt.d $[[FGRCC:f[0-9]+]], $[[Z]], $f12 +; GPR: cmp.lt.d $[[FGRCC:f[0-9]+]], $[[Z]], $f0 ; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC]] ; GPR-NOT: not $[[GPRCC]], $[[GPRCC]] ; GPR: bnezc $[[GPRCC]], {{\$|\.L}}BB Index: test/CodeGen/Mips/llvm-ir/select-dbl.ll =================================================================== --- test/CodeGen/Mips/llvm-ir/select-dbl.ll +++ test/CodeGen/Mips/llvm-ir/select-dbl.ll @@ -59,15 +59,15 @@ ; M3: andi $[[T0:[0-9]+]], $4, 1 ; M3: bnez $[[T0]], [[BB0:.LBB[0-9_]+]] - ; M3: nop - ; M3: mov.d $f13, $f14 + ; M3: mov.d $f0, $f13 + ; M3: mov.d $f0, $f14 ; M3: [[BB0]]: ; M3: jr $ra - ; M3: mov.d $f0, $f13 + ; M3: nop - ; CMOV-64: andi $[[T0:[0-9]+]], $4, 1 - ; CMOV-64: movn.d $f14, $f13, $[[T0]] ; CMOV-64: mov.d $f0, $f14 + ; CMOV-64: andi $[[T0:[0-9]+]], $4, 1 + ; CMOV-64: movn.d $f0, $f13, $[[T0]] ; SEL-64: mtc1 $4, $f0 ; SEL-64: sel.d $f0, $f14, $f13 @@ -90,16 +90,16 @@ ; M2: lw $[[T0:[0-9]+]], 16($sp) ; M2: andi $[[T1:[0-9]+]], $[[T0]], 1 ; M2: bnez $[[T1]], $[[BB0:BB[0-9_]+]] - ; M2: nop - ; M2: mov.d $f12, $f14 + ; M2: mov.d $f0, $f12 + ; M2: mov.d $f0, $f14 ; M2: $[[BB0]]: ; M2: jr $ra - ; M2: mov.d $f0, $f12 + ; M2: nop + ; CMOV-32: mov.d $f0, $f14 ; CMOV-32: lw $[[T0:[0-9]+]], 16($sp) ; CMOV-32: andi $[[T1:[0-9]+]], $[[T0]], 1 - ; CMOV-32: movn.d $f14, $f12, $[[T1]] - ; CMOV-32: mov.d $f0, $f14 + ; CMOV-32: movn.d $f0, $f12, $[[T1]] ; SEL-32: lw $[[T0:[0-9]+]], 16($sp) ; SEL-32: mtc1 $[[T0]], $f0 @@ -107,23 +107,23 @@ ; M3: andi $[[T0:[0-9]+]], $6, 1 ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] - ; M3: nop - ; M3: mov.d $f12, $f13 + ; M3: mov.d $f0, $f12 + ; M3: mov.d $f0, $f13 ; M3: [[BB0]]: ; M3: jr $ra - ; M3: mov.d $f0, $f12 + ; M3: nop - ; CMOV-64: andi $[[T0:[0-9]+]], $6, 1 - ; CMOV-64: movn.d $f13, $f12, $[[T0]] ; CMOV-64: mov.d $f0, $f13 + ; CMOV-64: andi $[[T0:[0-9]+]], $6, 1 + ; CMOV-64: movn.d $f0, $f12, $[[T0]] ; SEL-64: mtc1 $6, $f0 ; SEL-64: sel.d $f0, $f13, $f12 + ; MM32R3: mov.d $f0, $f14 ; MM32R3: lw $[[T0:[0-9]+]], 16($sp) ; MM32R3: andi16 $[[T1:[0-9]+]], $[[T0:[0-9]+]], 1 - ; MM32R3: movn.d $f14, $f12, $[[T1]] - ; MM32R3: mov.d $f0, $f14 + ; MM32R3: movn.d $f0, $f12, $[[T1]] %r = select i1 %s, double %x, double %y ret double %r @@ -133,34 +133,34 @@ entry: ; ALL-LABEL: tst_select_fcmp_olt_double: - ; M2: c.olt.d $f12, $f14 - ; M3: c.olt.d $f12, $f13 + ; M2-M3: mov.d $f0, $f12 + ; M2: c.olt.d $f0, $f14 + ; M3: c.olt.d $f0, $f13 ; M2: bc1t [[BB0:\$BB[0-9_]+]] ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.d $f12, $f14 - ; M3: mov.d $f12, $f13 + ; M2: mov.d $f0, $f14 + ; M3: mov.d $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.d $f0, $f12 - ; CMOV-32: c.olt.d $f12, $f14 - ; CMOV-32: movt.d $f14, $f12, $fcc0 ; CMOV-32: mov.d $f0, $f14 + ; CMOV-32: c.olt.d $f12, $f0 + ; CMOV-32: movt.d $f0, $f12, $fcc0 ; SEL-32: cmp.lt.d $f0, $f12, $f14 ; SEL-32: sel.d $f0, $f14, $f12 - ; CMOV-64: c.olt.d $f12, $f13 - ; CMOV-64: movt.d $f13, $f12, $fcc0 ; CMOV-64: mov.d $f0, $f13 + ; CMOV-64: c.olt.d $f12, $f0 + ; CMOV-64: movt.d $f0, $f12, $fcc0 ; SEL-64: cmp.lt.d $f0, $f12, $f13 ; SEL-64: sel.d $f0, $f13, $f12 - ; MM32R3: c.olt.d $f12, $f14 - ; MM32R3: movt.d $f14, $f12, $fcc0 ; MM32R3: mov.d $f0, $f14 + ; MM32R3: c.olt.d $f12, $f0 + ; MM32R3: movt.d $f0, $f12, $fcc0 %s = fcmp olt double %x, %y %r = select i1 %s, double %x, double %y @@ -171,34 +171,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_ole_double: - ; M2: c.ole.d $f12, $f14 - ; M3: c.ole.d $f12, $f13 + ; M2-M3: mov.d $f0, $f12 + ; M2: c.ole.d $f0, $f14 + ; M3: c.ole.d $f0, $f13 ; M2: bc1t [[BB0:\$BB[0-9_]+]] ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.d $f12, $f14 - ; M3: mov.d $f12, $f13 + ; M2: mov.d $f0, $f14 + ; M3: mov.d $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.d $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.ole.d $f12, $f14 - ; CMOV-32: movt.d $f14, $f12, $fcc0 ; CMOV-32: mov.d $f0, $f14 + ; CMOV-32: c.ole.d $f12, $f0 + ; CMOV-32: movt.d $f0, $f12, $fcc0 ; SEL-32: cmp.le.d $f0, $f12, $f14 ; SEL-32: sel.d $f0, $f14, $f12 - ; CMOV-64: c.ole.d $f12, $f13 - ; CMOV-64: movt.d $f13, $f12, $fcc0 ; CMOV-64: mov.d $f0, $f13 + ; CMOV-64: c.ole.d $f12, $f0 + ; CMOV-64: movt.d $f0, $f12, $fcc0 ; SEL-64: cmp.le.d $f0, $f12, $f13 ; SEL-64: sel.d $f0, $f13, $f12 - ; MM32R3: c.ole.d $f12, $f14 - ; MM32R3: movt.d $f14, $f12, $fcc0 ; MM32R3: mov.d $f0, $f14 + ; MM32R3: c.ole.d $f12, $f0 + ; MM32R3: movt.d $f0, $f12, $fcc0 %s = fcmp ole double %x, %y %r = select i1 %s, double %x, double %y @@ -209,34 +210,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_ogt_double: - ; M2: c.ule.d $f12, $f14 - ; M3: c.ule.d $f12, $f13 + ; M2-M3: mov.d $f0, $f12 + ; M2: c.ule.d $f0, $f14 + ; M3: c.ule.d $f0, $f13 ; M2: bc1f [[BB0:\$BB[0-9_]+]] ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.d $f12, $f14 - ; M3: mov.d $f12, $f13 + ; M2: mov.d $f0, $f14 + ; M3: mov.d $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.d $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.ule.d $f12, $f14 - ; CMOV-32: movf.d $f14, $f12, $fcc0 ; CMOV-32: mov.d $f0, $f14 + ; CMOV-32: c.ule.d $f12, $f0 + ; CMOV-32: movf.d $f0, $f12, $fcc0 ; SEL-32: cmp.lt.d $f0, $f14, $f12 ; SEL-32: sel.d $f0, $f14, $f12 - ; CMOV-64: c.ule.d $f12, $f13 - ; CMOV-64: movf.d $f13, $f12, $fcc0 ; CMOV-64: mov.d $f0, $f13 + ; CMOV-64: c.ule.d $f12, $f0 + ; CMOV-64: movf.d $f0, $f12, $fcc0 ; SEL-64: cmp.lt.d $f0, $f13, $f12 ; SEL-64: sel.d $f0, $f13, $f12 - ; MM32R3: c.ule.d $f12, $f14 - ; MM32R3: movf.d $f14, $f12, $fcc0 ; MM32R3: mov.d $f0, $f14 + ; MM32R3: c.ule.d $f12, $f0 + ; MM32R3: movf.d $f0, $f12, $fcc0 %s = fcmp ogt double %x, %y %r = select i1 %s, double %x, double %y @@ -247,34 +249,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_oge_double: - ; M2: c.ult.d $f12, $f14 - ; M3: c.ult.d $f12, $f13 + ; M2-M3: mov.d $f0, $f12 + ; M2: c.ult.d $f0, $f14 + ; M3: c.ult.d $f0, $f13 ; M2: bc1f [[BB0:\$BB[0-9_]+]] ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.d $f12, $f14 - ; M3: mov.d $f12, $f13 + ; M2: mov.d $f0, $f14 + ; M3: mov.d $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.d $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.ult.d $f12, $f14 - ; CMOV-32: movf.d $f14, $f12, $fcc0 ; CMOV-32: mov.d $f0, $f14 + ; CMOV-32: c.ult.d $f12, $f0 + ; CMOV-32: movf.d $f0, $f12, $fcc0 ; SEL-32: cmp.le.d $f0, $f14, $f12 ; SEL-32: sel.d $f0, $f14, $f12 - ; CMOV-64: c.ult.d $f12, $f13 - ; CMOV-64: movf.d $f13, $f12, $fcc0 ; CMOV-64: mov.d $f0, $f13 + ; CMOV-64: c.ult.d $f12, $f0 + ; CMOV-64: movf.d $f0, $f12, $fcc0 ; SEL-64: cmp.le.d $f0, $f13, $f12 ; SEL-64: sel.d $f0, $f13, $f12 - ; MM32R3: c.ult.d $f12, $f14 - ; MM32R3: movf.d $f14, $f12, $fcc0 ; MM32R3: mov.d $f0, $f14 + ; MM32R3: c.ult.d $f12, $f0 + ; MM32R3: movf.d $f0, $f12, $fcc0 %s = fcmp oge double %x, %y %r = select i1 %s, double %x, double %y @@ -285,34 +288,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_oeq_double: - ; M2: c.eq.d $f12, $f14 - ; M3: c.eq.d $f12, $f13 + ; M2-M3: mov.d $f0, $f12 + ; M2: c.eq.d $f0, $f14 + ; M3: c.eq.d $f0, $f13 ; M2: bc1t [[BB0:\$BB[0-9_]+]] ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.d $f12, $f14 - ; M3: mov.d $f12, $f13 + ; M2: mov.d $f0, $f14 + ; M3: mov.d $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.d $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.eq.d $f12, $f14 - ; CMOV-32: movt.d $f14, $f12, $fcc0 ; CMOV-32: mov.d $f0, $f14 + ; CMOV-32: c.eq.d $f12, $f0 + ; CMOV-32: movt.d $f0, $f12, $fcc0 ; SEL-32: cmp.eq.d $f0, $f12, $f14 ; SEL-32: sel.d $f0, $f14, $f12 - ; CMOV-64: c.eq.d $f12, $f13 - ; CMOV-64: movt.d $f13, $f12, $fcc0 ; CMOV-64: mov.d $f0, $f13 + ; CMOV-64: c.eq.d $f12, $f0 + ; CMOV-64: movt.d $f0, $f12, $fcc0 ; SEL-64: cmp.eq.d $f0, $f12, $f13 ; SEL-64: sel.d $f0, $f13, $f12 - ; MM32R3: c.eq.d $f12, $f14 - ; MM32R3: movt.d $f14, $f12, $fcc0 ; MM32R3: mov.d $f0, $f14 + ; MM32R3: c.eq.d $f12, $f0 + ; MM32R3: movt.d $f0, $f12, $fcc0 %s = fcmp oeq double %x, %y %r = select i1 %s, double %x, double %y @@ -323,20 +327,21 @@ entry: ; ALL-LABEL: tst_select_fcmp_one_double: - ; M2: c.ueq.d $f12, $f14 - ; M3: c.ueq.d $f12, $f13 + ; M2-M3: mov.d $f0, $f12 + ; M2: c.ueq.d $f0, $f14 + ; M3: c.ueq.d $f0, $f13 ; M2: bc1f [[BB0:\$BB[0-9_]+]] ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.d $f12, $f14 - ; M3: mov.d $f12, $f13 + ; M2: mov.d $f0, $f14 + ; M3: mov.d $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.d $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.ueq.d $f12, $f14 - ; CMOV-32: movf.d $f14, $f12, $fcc0 ; CMOV-32: mov.d $f0, $f14 + ; CMOV-32: c.ueq.d $f12, $f0 + ; CMOV-32: movf.d $f0, $f12, $fcc0 ; SEL-32: cmp.ueq.d $f0, $f12, $f14 ; SEL-32: mfc1 $[[T0:[0-9]+]], $f0 @@ -344,9 +349,9 @@ ; SEL-32: mtc1 $[[T0:[0-9]+]], $f0 ; SEL-32: sel.d $f0, $f14, $f12 - ; CMOV-64: c.ueq.d $f12, $f13 - ; CMOV-64: movf.d $f13, $f12, $fcc0 ; CMOV-64: mov.d $f0, $f13 + ; CMOV-64: c.ueq.d $f12, $f0 + ; CMOV-64: movf.d $f0, $f12, $fcc0 ; SEL-64: cmp.ueq.d $f0, $f12, $f13 ; SEL-64: mfc1 $[[T0:[0-9]+]], $f0 @@ -354,9 +359,9 @@ ; SEL-64: mtc1 $[[T0:[0-9]+]], $f0 ; SEL-64: sel.d $f0, $f13, $f12 - ; MM32R3: c.ueq.d $f12, $f14 - ; MM32R3: movf.d $f14, $f12, $fcc0 ; MM32R3: mov.d $f0, $f14 + ; MM32R3: c.ueq.d $f12, $f0 + ; MM32R3: movf.d $f0, $f12, $fcc0 %s = fcmp one double %x, %y %r = select i1 %s, double %x, double %y Index: test/CodeGen/Mips/llvm-ir/select-flt.ll =================================================================== --- test/CodeGen/Mips/llvm-ir/select-flt.ll +++ test/CodeGen/Mips/llvm-ir/select-flt.ll @@ -36,14 +36,14 @@ ; M2-M3: andi $[[T0:[0-9]+]], $4, 1 ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]] ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] - ; M2-M3: nop + ; M3: mov.s $f0, $f13 + ; M3: mov.s $f0, $f14 + ; M2: nop ; M2: jr $ra ; M2: mtc1 $6, $f0 - ; M3: mov.s $f13, $f14 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2: mtc1 $5, $f0 - ; M3: mov.s $f0, $f13 ; CMOV-32: mtc1 $6, $f0 ; CMOV-32: andi $[[T0:[0-9]+]], $4, 1 @@ -55,9 +55,9 @@ ; SEL-32: mtc1 $4, $f0 ; SEL-32: sel.s $f0, $[[F1]], $[[F0]] - ; CMOV-64: andi $[[T0:[0-9]+]], $4, 1 - ; CMOV-64: movn.s $f14, $f13, $[[T0]] ; CMOV-64: mov.s $f0, $f14 + ; CMOV-64: andi $[[T0:[0-9]+]], $4, 1 + ; CMOV-64: movn.s $f0, $f13, $[[T0]] ; SEL-64: mtc1 $4, $f0 ; SEL-64: sel.s $f0, $f14, $f13 @@ -79,30 +79,30 @@ ; M2-M3: andi $[[T0:[0-9]+]], $6, 1 ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]] ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] - ; M2-M3: nop - ; M2: mov.s $f12, $f14 - ; M3: mov.s $f12, $f13 + ; M2-M3: mov.s $f0, $f12 + ; M2: mov.s $f0, $f14 + ; M3: mov.s $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.s $f0, $f12 + ; M2-M3: nop - ; CMOV-32: andi $[[T0:[0-9]+]], $6, 1 - ; CMOV-32: movn.s $f14, $f12, $[[T0]] ; CMOV-32: mov.s $f0, $f14 + ; CMOV-32: andi $[[T0:[0-9]+]], $6, 1 + ; CMOV-32: movn.s $f0, $f12, $[[T0]] ; SEL-32: mtc1 $6, $f0 ; SEL-32: sel.s $f0, $f14, $f12 - ; CMOV-64: andi $[[T0:[0-9]+]], $6, 1 - ; CMOV-64: movn.s $f13, $f12, $[[T0]] ; CMOV-64: mov.s $f0, $f13 + ; CMOV-64: andi $[[T0:[0-9]+]], $6, 1 + ; CMOV-64: movn.s $f0, $f12, $[[T0]] ; SEL-64: mtc1 $6, $f0 ; SEL-64: sel.s $f0, $f13, $f12 + ; MM32R3: mov.s $[[F0:f[0-9]+]], $f14 ; MM32R3: andi16 $[[T0:[0-9]+]], $6, 1 - ; MM32R3: movn.s $[[F0:f[0-9]+]], $f12, $[[T0]] - ; MM32R3: mov.s $f0, $[[F0]] + ; MM32R3: movn.s $[[F0]], $f12, $[[T0]] %r = select i1 %s, float %x, float %y ret float %r @@ -112,34 +112,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_olt_float: - ; M2: c.olt.s $f12, $f14 - ; M3: c.olt.s $f12, $f13 + ; M2-M3: mov.s $f0, $f12 + ; M2: c.olt.s $f0, $f14 + ; M3: c.olt.s $f0, $f13 ; M2: bc1t [[BB0:\$BB[0-9_]+]] ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.s $f12, $f14 - ; M3: mov.s $f12, $f13 + ; M2: mov.s $f0, $f14 + ; M3: mov.s $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.s $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.olt.s $f12, $f14 - ; CMOV-32: movt.s $f14, $f12, $fcc0 ; CMOV-32: mov.s $f0, $f14 + ; CMOV-32: c.olt.s $f12, $f0 + ; CMOV-32: movt.s $f0, $f12, $fcc0 ; SEL-32: cmp.lt.s $f0, $f12, $f14 ; SEL-32: sel.s $f0, $f14, $f12 - ; CMOV-64: c.olt.s $f12, $f13 - ; CMOV-64: movt.s $f13, $f12, $fcc0 ; CMOV-64: mov.s $f0, $f13 + ; CMOV-64: c.olt.s $f12, $f0 + ; CMOV-64: movt.s $f0, $f12, $fcc0 ; SEL-64: cmp.lt.s $f0, $f12, $f13 ; SEL-64: sel.s $f0, $f13, $f12 - ; MM32R3: c.olt.s $f12, $f14 - ; MM32R3: movt.s $f14, $f12, $fcc0 ; MM32R3: mov.s $f0, $f14 + ; MM32R3: c.olt.s $f12, $f0 + ; MM32R3: movt.s $f0, $f12, $fcc0 %s = fcmp olt float %x, %y %r = select i1 %s, float %x, float %y @@ -150,34 +151,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_ole_float: - ; M2: c.ole.s $f12, $f14 - ; M3: c.ole.s $f12, $f13 + ; M2-M3: mov.s $f0, $f12 + ; M2: c.ole.s $f0, $f14 + ; M3: c.ole.s $f0, $f13 ; M2: bc1t [[BB0:\$BB[0-9_]+]] ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.s $f12, $f14 - ; M3: mov.s $f12, $f13 + ; M2: mov.s $f0, $f14 + ; M3: mov.s $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.s $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.ole.s $f12, $f14 - ; CMOV-32: movt.s $f14, $f12, $fcc0 ; CMOV-32: mov.s $f0, $f14 + ; CMOV-32: c.ole.s $f12, $f0 + ; CMOV-32: movt.s $f0, $f12, $fcc0 ; SEL-32: cmp.le.s $f0, $f12, $f14 ; SEL-32: sel.s $f0, $f14, $f12 - ; CMOV-64: c.ole.s $f12, $f13 - ; CMOV-64: movt.s $f13, $f12, $fcc0 ; CMOV-64: mov.s $f0, $f13 + ; CMOV-64: c.ole.s $f12, $f0 + ; CMOV-64: movt.s $f0, $f12, $fcc0 ; SEL-64: cmp.le.s $f0, $f12, $f13 ; SEL-64: sel.s $f0, $f13, $f12 - ; MM32R3: c.ole.s $f12, $f14 - ; MM32R3: movt.s $f14, $f12, $fcc0 ; MM32R3: mov.s $f0, $f14 + ; MM32R3: c.ole.s $f12, $f0 + ; MM32R3: movt.s $f0, $f12, $fcc0 %s = fcmp ole float %x, %y %r = select i1 %s, float %x, float %y @@ -188,34 +190,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_ogt_float: - ; M2: c.ule.s $f12, $f14 - ; M3: c.ule.s $f12, $f13 + ; M2-M3: mov.s $f0, $f12 + ; M2: c.ule.s $f0, $f14 + ; M3: c.ule.s $f0, $f13 ; M2: bc1f [[BB0:\$BB[0-9_]+]] ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.s $f12, $f14 - ; M3: mov.s $f12, $f13 + ; M2: mov.s $f0, $f14 + ; M3: mov.s $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.s $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.ule.s $f12, $f14 - ; CMOV-32: movf.s $f14, $f12, $fcc0 ; CMOV-32: mov.s $f0, $f14 + ; CMOV-32: c.ule.s $f12, $f0 + ; CMOV-32: movf.s $f0, $f12, $fcc0 ; SEL-32: cmp.lt.s $f0, $f14, $f12 ; SEL-32: sel.s $f0, $f14, $f12 - ; CMOV-64: c.ule.s $f12, $f13 - ; CMOV-64: movf.s $f13, $f12, $fcc0 ; CMOV-64: mov.s $f0, $f13 + ; CMOV-64: c.ule.s $f12, $f0 + ; CMOV-64: movf.s $f0, $f12, $fcc0 ; SEL-64: cmp.lt.s $f0, $f13, $f12 ; SEL-64: sel.s $f0, $f13, $f12 - ; MM32R3: c.ule.s $f12, $f14 - ; MM32R3: movf.s $f14, $f12, $fcc0 ; MM32R3: mov.s $f0, $f14 + ; MM32R3: c.ule.s $f12, $f0 + ; MM32R3: movf.s $f0, $f12, $fcc0 %s = fcmp ogt float %x, %y %r = select i1 %s, float %x, float %y @@ -226,34 +229,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_oge_float: - ; M2: c.ult.s $f12, $f14 - ; M3: c.ult.s $f12, $f13 + ; M2-M3: mov.s $f0, $f12 + ; M2: c.ult.s $f0, $f14 + ; M3: c.ult.s $f0, $f13 ; M2: bc1f [[BB0:\$BB[0-9_]+]] ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.s $f12, $f14 - ; M3: mov.s $f12, $f13 + ; M2: mov.s $f0, $f14 + ; M3: mov.s $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.s $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.ult.s $f12, $f14 - ; CMOV-32: movf.s $f14, $f12, $fcc0 ; CMOV-32: mov.s $f0, $f14 + ; CMOV-32: c.ult.s $f12, $f0 + ; CMOV-32: movf.s $f0, $f12, $fcc0 ; SEL-32: cmp.le.s $f0, $f14, $f12 ; SEL-32: sel.s $f0, $f14, $f12 - ; CMOV-64: c.ult.s $f12, $f13 - ; CMOV-64: movf.s $f13, $f12, $fcc0 ; CMOV-64: mov.s $f0, $f13 + ; CMOV-64: c.ult.s $f12, $f0 + ; CMOV-64: movf.s $f0, $f12, $fcc0 ; SEL-64: cmp.le.s $f0, $f13, $f12 ; SEL-64: sel.s $f0, $f13, $f12 - ; MM32R3: c.ult.s $f12, $f14 - ; MM32R3: movf.s $f14, $f12, $fcc0 ; MM32R3: mov.s $f0, $f14 + ; MM32R3: c.ult.s $f12, $f0 + ; MM32R3: movf.s $f0, $f12, $fcc0 %s = fcmp oge float %x, %y %r = select i1 %s, float %x, float %y @@ -264,34 +268,35 @@ entry: ; ALL-LABEL: tst_select_fcmp_oeq_float: - ; M2: c.eq.s $f12, $f14 - ; M3: c.eq.s $f12, $f13 + ; M2-M3: mov.s $f0, $f12 + ; M2: c.eq.s $f0, $f14 + ; M3: c.eq.s $f0, $f13 ; M2: bc1t [[BB0:\$BB[0-9_]+]] ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.s $f12, $f14 - ; M3: mov.s $f12, $f13 + ; M2: mov.s $f0, $f14 + ; M3: mov.s $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.s $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.eq.s $f12, $f14 - ; CMOV-32: movt.s $f14, $f12, $fcc0 ; CMOV-32: mov.s $f0, $f14 + ; CMOV-32: c.eq.s $f12, $f0 + ; CMOV-32: movt.s $f0, $f12, $fcc0 ; SEL-32: cmp.eq.s $f0, $f12, $f14 ; SEL-32: sel.s $f0, $f14, $f12 - ; CMOV-64: c.eq.s $f12, $f13 - ; CMOV-64: movt.s $f13, $f12, $fcc0 ; CMOV-64: mov.s $f0, $f13 + ; CMOV-64: c.eq.s $f12, $f0 + ; CMOV-64: movt.s $f0, $f12, $fcc0 ; SEL-64: cmp.eq.s $f0, $f12, $f13 ; SEL-64: sel.s $f0, $f13, $f12 - ; MM32R3: c.eq.s $f12, $f14 - ; MM32R3: movt.s $f14, $f12, $fcc0 ; MM32R3: mov.s $f0, $f14 + ; MM32R3: c.eq.s $f12, $f0 + ; MM32R3: movt.s $f0, $f12, $fcc0 %s = fcmp oeq float %x, %y %r = select i1 %s, float %x, float %y @@ -302,20 +307,21 @@ entry: ; ALL-LABEL: tst_select_fcmp_one_float: - ; M2: c.ueq.s $f12, $f14 - ; M3: c.ueq.s $f12, $f13 + ; M2-M3: mov.s $f0, $f12 + ; M2: c.ueq.s $f0, $f14 + ; M3: c.ueq.s $f0, $f13 ; M2: bc1f [[BB0:\$BB[0-9_]+]] ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: mov.s $f12, $f14 - ; M3: mov.s $f12, $f13 + ; M2: mov.s $f0, $f14 + ; M3: mov.s $f0, $f13 ; M2-M3: [[BB0]]: ; M2-M3: jr $ra - ; M2-M3: mov.s $f0, $f12 + ; M2-M3: nop - ; CMOV-32: c.ueq.s $f12, $f14 - ; CMOV-32: movf.s $f14, $f12, $fcc0 ; CMOV-32: mov.s $f0, $f14 + ; CMOV-32: c.ueq.s $f12, $f0 + ; CMOV-32: movf.s $f0, $f12, $fcc0 ; SEL-32: cmp.ueq.s $f0, $f12, $f14 ; SEL-32: mfc1 $[[T0:[0-9]+]], $f0 @@ -323,9 +329,9 @@ ; SEL-32: mtc1 $[[T0:[0-9]+]], $f0 ; SEL-32: sel.s $f0, $f14, $f12 - ; CMOV-64: c.ueq.s $f12, $f13 - ; CMOV-64: movf.s $f13, $f12, $fcc0 ; CMOV-64: mov.s $f0, $f13 + ; CMOV-64: c.ueq.s $f12, $f0 + ; CMOV-64: movf.s $f0, $f12, $fcc0 ; SEL-64: cmp.ueq.s $f0, $f12, $f13 ; SEL-64: mfc1 $[[T0:[0-9]+]], $f0 @@ -333,9 +339,9 @@ ; SEL-64: mtc1 $[[T0:[0-9]+]], $f0 ; SEL-64: sel.s $f0, $f13, $f12 - ; MM32R3: c.ueq.s $f12, $f14 - ; MM32R3: movf.s $f14, $f12, $fcc0 ; MM32R3: mov.s $f0, $f14 + ; MM32R3: c.ueq.s $f12, $f0 + ; MM32R3: movf.s $f0, $f12, $fcc0 %s = fcmp one float %x, %y %r = select i1 %s, float %x, float %y Index: test/CodeGen/Mips/o32_cc_byval.ll =================================================================== --- test/CodeGen/Mips/o32_cc_byval.ll +++ test/CodeGen/Mips/o32_cc_byval.ll @@ -97,14 +97,14 @@ define void @f4(float %f, %struct.S3* nocapture byval %s3, %struct.S1* nocapture byval %s1) nounwind { entry: ; CHECK: addiu $sp, $sp, -48 -; CHECK-DAG: sw $7, 60($sp) +; CHECK: move $4, $7 +; CHECK-DAG: sw $4, 60($sp) ; CHECK-DAG: sw $6, 56($sp) ; CHECK-DAG: sw $5, 52($sp) ; CHECK-DAG: lw $[[R1:[0-9]+]], 80($sp) ; CHECK-DAG: lb $[[R0:[0-9]+]], 52($sp) ; CHECK-DAG: sw $[[R0]], 32($sp) ; CHECK-DAG: sw $[[R1]], 24($sp) -; CHECK: move $4, $7 %i = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 2 %tmp = load i32, i32* %i, align 4 Index: test/CodeGen/Mips/select.ll =================================================================== --- test/CodeGen/Mips/select.ll +++ test/CodeGen/Mips/select.ll @@ -147,11 +147,11 @@ ; 32R6: mtc1 $[[T0]], $[[CC:f0]] ; 32R6: sel.s $[[CC]], $[[F1]], $[[F0]] -; 64: movn.s $f14, $f13, $4 ; 64: mov.s $f0, $f14 +; 64: movn.s $f0, $f13, $4 -; 64R2: movn.s $f14, $f13, $4 ; 64R2: mov.s $f0, $f14 +; 64R2: movn.s $f0, $f13, $4 ; 64R6: sltu $[[T0:[0-9]+]], $zero, $4 ; 64R6: mtc1 $[[T0]], $[[CC:f0]] @@ -183,11 +183,11 @@ ; 32R6-DAG: ldc1 $[[F1:f[0-9]+]], 16($sp) ; 32R6: sel.d $[[CC]], $[[F1]], $[[F0]] -; 64: movn.d $f14, $f13, $4 ; 64: mov.d $f0, $f14 +; 64: movn.d $f0, $f13, $4 -; 64R2: movn.d $f14, $f13, $4 ; 64R2: mov.d $f0, $f14 +; 64R2: movn.d $f0, $f13, $4 ; 64R6-DAG: sltu $[[T0:[0-9]+]], $zero, $4 ; 64R6-DAG: mtc1 $[[T0]], $[[CC:f0]] @@ -202,30 +202,30 @@ entry: ; ALL-LABEL: f32_fcmp_oeq_f32_val: +; 32: mov.s $f0, $f14 ; 32-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32: c.eq.s $[[F2]], $[[F3]] -; 32: movt.s $f14, $f12, $fcc0 -; 32: mov.s $f0, $f14 +; 32: movt.s $f0, $f12, $fcc0 +; 32R2: mov.s $f0, $f14 ; 32R2-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R2-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32R2: c.eq.s $[[F2]], $[[F3]] -; 32R2: movt.s $f14, $f12, $fcc0 -; 32R2: mov.s $f0, $f14 +; 32R2: movt.s $f0, $f12, $fcc0 ; 32R6-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R6-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32R6: cmp.eq.s $[[CC:f0]], $[[F2]], $[[F3]] ; 32R6: sel.s $[[CC]], $f14, $f12 -; 64: c.eq.s $f14, $f15 -; 64: movt.s $f13, $f12, $fcc0 ; 64: mov.s $f0, $f13 +; 64: c.eq.s $f14, $f15 +; 64: movt.s $f0, $f12, $fcc0 -; 64R2: c.eq.s $f14, $f15 -; 64R2: movt.s $f13, $f12, $fcc0 ; 64R2: mov.s $f0, $f13 +; 64R2: c.eq.s $f14, $f15 +; 64R2: movt.s $f0, $f12, $fcc0 ; 64R6: cmp.eq.s $[[CC:f0]], $f14, $f15 ; 64R6: sel.s $[[CC]], $f13, $f12 @@ -239,30 +239,30 @@ entry: ; ALL-LABEL: f32_fcmp_olt_f32_val: +; 32: mov.s $f0, $f14 ; 32-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32: c.olt.s $[[F2]], $[[F3]] -; 32: movt.s $f14, $f12, $fcc0 -; 32: mov.s $f0, $f14 +; 32: movt.s $f0, $f12, $fcc0 +; 32R2: mov.s $f0, $f14 ; 32R2-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R2-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32R2: c.olt.s $[[F2]], $[[F3]] -; 32R2: movt.s $f14, $f12, $fcc0 -; 32R2: mov.s $f0, $f14 +; 32R2: movt.s $f0, $f12, $fcc0 ; 32R6-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R6-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32R6: cmp.lt.s $[[CC:f0]], $[[F2]], $[[F3]] ; 32R6: sel.s $[[CC]], $f14, $f12 -; 64: c.olt.s $f14, $f15 -; 64: movt.s $f13, $f12, $fcc0 ; 64: mov.s $f0, $f13 +; 64: c.olt.s $f14, $f15 +; 64: movt.s $f0, $f12, $fcc0 -; 64R2: c.olt.s $f14, $f15 -; 64R2: movt.s $f13, $f12, $fcc0 ; 64R2: mov.s $f0, $f13 +; 64R2: c.olt.s $f14, $f15 +; 64R2: movt.s $f0, $f12, $fcc0 ; 64R6: cmp.lt.s $[[CC:f0]], $f14, $f15 ; 64R6: sel.s $[[CC]], $f13, $f12 @@ -276,30 +276,30 @@ entry: ; ALL-LABEL: f32_fcmp_ogt_f32_val: +; 32: mov.s $f0, $f14 ; 32-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32: c.ule.s $[[F2]], $[[F3]] -; 32: movf.s $f14, $f12, $fcc0 -; 32: mov.s $f0, $f14 +; 32: movf.s $f0, $f12, $fcc0 +; 32R2: mov.s $f0, $f14 ; 32R2-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R2-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32R2: c.ule.s $[[F2]], $[[F3]] -; 32R2: movf.s $f14, $f12, $fcc0 -; 32R2: mov.s $f0, $f14 +; 32R2: movf.s $f0, $f12, $fcc0 ; 32R6-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R6-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32R6: cmp.lt.s $[[CC:f0]], $[[F3]], $[[F2]] ; 32R6: sel.s $[[CC]], $f14, $f12 -; 64: c.ule.s $f14, $f15 -; 64: movf.s $f13, $f12, $fcc0 ; 64: mov.s $f0, $f13 +; 64: c.ule.s $f14, $f15 +; 64: movf.s $f0, $f12, $fcc0 -; 64R2: c.ule.s $f14, $f15 -; 64R2: movf.s $f13, $f12, $fcc0 ; 64R2: mov.s $f0, $f13 +; 64R2: c.ule.s $f14, $f15 +; 64R2: movf.s $f0, $f12, $fcc0 ; 64R6: cmp.lt.s $[[CC:f0]], $f15, $f14 ; 64R6: sel.s $[[CC]], $f13, $f12 @@ -313,30 +313,30 @@ entry: ; ALL-LABEL: f32_fcmp_ogt_f64_val: +; 32: mov.d $f0, $f14 ; 32-DAG: lwc1 $[[F2:f[0-9]+]], 16($sp) ; 32-DAG: lwc1 $[[F3:f[0-9]+]], 20($sp) ; 32: c.ule.s $[[F2]], $[[F3]] -; 32: movf.d $f14, $f12, $fcc0 -; 32: mov.d $f0, $f14 +; 32: movf.d $f0, $f12, $fcc0 +; 32R2: mov.d $f0, $f14 ; 32R2-DAG: lwc1 $[[F2:f[0-9]+]], 16($sp) ; 32R2-DAG: lwc1 $[[F3:f[0-9]+]], 20($sp) ; 32R2: c.ule.s $[[F2]], $[[F3]] -; 32R2: movf.d $f14, $f12, $fcc0 -; 32R2: mov.d $f0, $f14 +; 32R2: movf.d $f0, $f12, $fcc0 ; 32R6-DAG: lwc1 $[[F2:f[0-9]+]], 16($sp) ; 32R6-DAG: lwc1 $[[F3:f[0-9]+]], 20($sp) ; 32R6: cmp.lt.s $[[CC:f0]], $[[F3]], $[[F2]] ; 32R6: sel.d $[[CC]], $f14, $f12 -; 64: c.ule.s $f14, $f15 -; 64: movf.d $f13, $f12, $fcc0 ; 64: mov.d $f0, $f13 +; 64: c.ule.s $f14, $f15 +; 64: movf.d $f0, $f12, $fcc0 -; 64R2: c.ule.s $f14, $f15 -; 64R2: movf.d $f13, $f12, $fcc0 ; 64R2: mov.d $f0, $f13 +; 64R2: c.ule.s $f14, $f15 +; 64R2: movf.d $f0, $f12, $fcc0 ; 64R6: cmp.lt.s $[[CC:f0]], $f15, $f14 ; 64R6: sel.d $[[CC]], $f13, $f12 @@ -350,30 +350,30 @@ entry: ; ALL-LABEL: f64_fcmp_oeq_f64_val: +; 32: mov.d $f0, $f14 ; 32-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32: c.eq.d $[[F2]], $[[F3]] -; 32: movt.d $f14, $f12, $fcc0 -; 32: mov.d $f0, $f14 +; 32: movt.d $f0, $f12, $fcc0 +; 32R2: mov.d $f0, $f14 ; 32R2-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32R2-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32R2: c.eq.d $[[F2]], $[[F3]] -; 32R2: movt.d $f14, $f12, $fcc0 -; 32R2: mov.d $f0, $f14 +; 32R2: movt.d $f0, $f12, $fcc0 ; 32R6-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32R6-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32R6: cmp.eq.d $[[CC:f0]], $[[F2]], $[[F3]] ; 32R6: sel.d $[[CC]], $f14, $f12 -; 64: c.eq.d $f14, $f15 -; 64: movt.d $f13, $f12, $fcc0 ; 64: mov.d $f0, $f13 +; 64: c.eq.d $f14, $f15 +; 64: movt.d $f0, $f12, $fcc0 -; 64R2: c.eq.d $f14, $f15 -; 64R2: movt.d $f13, $f12, $fcc0 ; 64R2: mov.d $f0, $f13 +; 64R2: c.eq.d $f14, $f15 +; 64R2: movt.d $f0, $f12, $fcc0 ; 64R6: cmp.eq.d $[[CC:f0]], $f14, $f15 ; 64R6: sel.d $[[CC]], $f13, $f12 @@ -387,30 +387,30 @@ entry: ; ALL-LABEL: f64_fcmp_olt_f64_val: +; 32: mov.d $f0, $f14 ; 32-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32: c.olt.d $[[F2]], $[[F3]] -; 32: movt.d $f14, $f12, $fcc0 -; 32: mov.d $f0, $f14 +; 32: movt.d $f0, $f12, $fcc0 +; 32R2: mov.d $f0, $f14 ; 32R2-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32R2-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32R2: c.olt.d $[[F2]], $[[F3]] -; 32R2: movt.d $f14, $f12, $fcc0 -; 32R2: mov.d $f0, $f14 +; 32R2: movt.d $f0, $f12, $fcc0 ; 32R6-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32R6-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32R6: cmp.lt.d $[[CC:f0]], $[[F2]], $[[F3]] ; 32R6: sel.d $[[CC]], $f14, $f12 -; 64: c.olt.d $f14, $f15 -; 64: movt.d $f13, $f12, $fcc0 ; 64: mov.d $f0, $f13 +; 64: c.olt.d $f14, $f15 +; 64: movt.d $f0, $f12, $fcc0 -; 64R2: c.olt.d $f14, $f15 -; 64R2: movt.d $f13, $f12, $fcc0 ; 64R2: mov.d $f0, $f13 +; 64R2: c.olt.d $f14, $f15 +; 64R2: movt.d $f0, $f12, $fcc0 ; 64R6: cmp.lt.d $[[CC:f0]], $f14, $f15 ; 64R6: sel.d $[[CC]], $f13, $f12 @@ -424,30 +424,30 @@ entry: ; ALL-LABEL: f64_fcmp_ogt_f64_val: +; 32: mov.d $f0, $f14 ; 32-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32: c.ule.d $[[F2]], $[[F3]] -; 32: movf.d $f14, $f12, $fcc0 -; 32: mov.d $f0, $f14 +; 32: movf.d $f0, $f12, $fcc0 +; 32R2: mov.d $f0, $f14 ; 32R2-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32R2-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32R2: c.ule.d $[[F2]], $[[F3]] -; 32R2: movf.d $f14, $f12, $fcc0 -; 32R2: mov.d $f0, $f14 +; 32R2: movf.d $f0, $f12, $fcc0 ; 32R6-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32R6-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) ; 32R6: cmp.lt.d $[[CC:f0]], $[[F3]], $[[F2]] ; 32R6: sel.d $[[CC]], $f14, $f12 -; 64: c.ule.d $f14, $f15 -; 64: movf.d $f13, $f12, $fcc0 ; 64: mov.d $f0, $f13 +; 64: c.ule.d $f14, $f15 +; 64: movf.d $f0, $f12, $fcc0 -; 64R2: c.ule.d $f14, $f15 -; 64R2: movf.d $f13, $f12, $fcc0 ; 64R2: mov.d $f0, $f13 +; 64R2: c.ule.d $f14, $f15 +; 64R2: movf.d $f0, $f12, $fcc0 ; 64R6: cmp.lt.d $[[CC:f0]], $f15, $f14 ; 64R6: sel.d $[[CC]], $f13, $f12 @@ -461,19 +461,19 @@ entry: ; ALL-LABEL: f64_fcmp_ogt_f32_val: +; 32: mov.s $f0, $f14 ; 32-DAG: mtc1 $6, $[[F2:f[1-3]*[02468]+]] ; 32-DAG: mtc1 $7, $[[F2H:f[1-3]*[13579]+]] ; 32-DAG: ldc1 $[[F3:f[0-9]+]], 16($sp) ; 32: c.ule.d $[[F2]], $[[F3]] -; 32: movf.s $f14, $f12, $fcc0 -; 32: mov.s $f0, $f14 +; 32: movf.s $f0, $f12, $fcc0 +; 32R2: mov.s $f0, $f14 ; 32R2-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R2-DAG: mthc1 $7, $[[F2]] ; 32R2-DAG: ldc1 $[[F3:f[0-9]+]], 16($sp) ; 32R2: c.ule.d $[[F2]], $[[F3]] -; 32R2: movf.s $f14, $f12, $fcc0 -; 32R2: mov.s $f0, $f14 +; 32R2: movf.s $f0, $f12, $fcc0 ; 32R6-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R6-DAG: mthc1 $7, $[[F2]] @@ -481,13 +481,13 @@ ; 32R6: cmp.lt.d $[[CC:f0]], $[[F3]], $[[F2]] ; 32R6: sel.s $[[CC]], $f14, $f12 -; 64: c.ule.d $f14, $f15 -; 64: movf.s $f13, $f12, $fcc0 ; 64: mov.s $f0, $f13 +; 64: c.ule.d $f14, $f15 +; 64: movf.s $f0, $f12, $fcc0 -; 64R2: c.ule.d $f14, $f15 -; 64R2: movf.s $f13, $f12, $fcc0 ; 64R2: mov.s $f0, $f13 +; 64R2: c.ule.d $f14, $f15 +; 64R2: movf.s $f0, $f12, $fcc0 ; 64R6: cmp.lt.d $[[CC:f0]], $f15, $f14 ; 64R6: sel.s $[[CC]], $f13, $f12 Index: test/CodeGen/PowerPC/licm-tocReg.ll =================================================================== --- test/CodeGen/PowerPC/licm-tocReg.ll +++ test/CodeGen/PowerPC/licm-tocReg.ll @@ -68,19 +68,19 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: addis 4, 2, .LC0@toc@ha ; CHECK-NEXT: addis 5, 2, .LC1@toc@ha +; CHECK-NEXT: mr 12, 3 ; CHECK-NEXT: ld 4, .LC0@toc@l(4) ; CHECK-NEXT: ld 5, .LC1@toc@l(5) ; CHECK-NEXT: lwz 6, 0(4) ; CHECK-NEXT: lwz 5, 0(5) +; CHECK-NEXT: lwz 4, 0(4) ; CHECK-NEXT: cmpw 6, 5 -; CHECK-NEXT: lwz 5, 0(4) -; CHECK-NEXT: mr 4, 3 ; CHECK-NEXT: bgt 0, .LBB0_3 ; CHECK-NEXT: # BB#1: ; CHECK-NEXT: addis 3, 2, .LC0@toc@ha -; CHECK-NEXT: addis 6, 2, .LC1@toc@ha +; CHECK-NEXT: addis 5, 2, .LC1@toc@ha ; CHECK-NEXT: ld 3, .LC0@toc@l(3) -; CHECK-NEXT: ld 6, .LC1@toc@l(6) +; CHECK-NEXT: ld 5, .LC1@toc@l(5) ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_2: # %if.end ; CHECK-NOT: addis {{[0-9]+}}, 2, .LC0@toc@ha Index: test/CodeGen/PowerPC/load-two-flts.ll =================================================================== --- test/CodeGen/PowerPC/load-two-flts.ll +++ test/CodeGen/PowerPC/load-two-flts.ll @@ -53,8 +53,8 @@ ; CHECK-NOT: ldu {{[0-9]+}}, 8(5) ; CHECK-NOT: stw ; CHECK-NOT: rldicl -; CHECK-DAG: lfsu {{[0-9]+}}, 8(5) -; CHECK-DAG: lfs {{[0-9]+}}, 4(5) +; CHECK-DAG: lfsu {{[0-9]+}}, 8(3) +; CHECK-DAG: lfs {{[0-9]+}}, 4(3) ; CHECK: blr } Index: test/CodeGen/PowerPC/ppc64-byval-align.ll =================================================================== --- test/CodeGen/PowerPC/ppc64-byval-align.ll +++ test/CodeGen/PowerPC/ppc64-byval-align.ll @@ -24,8 +24,7 @@ ret void } ; CHECK-LABEL: @caller1 -; CHECK: mr [[REG:[0-9]+]], 3 -; CHECK: mr 7, [[REG]] +; CHECK: mr 7, 3 ; CHECK: bl test1 define i64 @callee2(%struct.pad* byval nocapture readnone %x, i32 signext %y, %struct.test* byval align 16 nocapture readonly %z) { Index: test/CodeGen/PowerPC/select-i1-vs-i1.ll =================================================================== --- test/CodeGen/PowerPC/select-i1-vs-i1.ll +++ test/CodeGen/PowerPC/select-i1-vs-i1.ll @@ -477,11 +477,10 @@ ; CHECK-LABEL: @testfloatslt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -496,11 +495,10 @@ ; CHECK-LABEL: @testfloatult ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -515,11 +513,10 @@ ; CHECK-LABEL: @testfloatsle ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -534,11 +531,10 @@ ; CHECK-LABEL: @testfloatule ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -553,11 +549,10 @@ ; CHECK-LABEL: @testfloateq ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -572,11 +567,10 @@ ; CHECK-LABEL: @testfloatsge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -591,11 +585,10 @@ ; CHECK-LABEL: @testfloatuge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -610,11 +603,10 @@ ; CHECK-LABEL: @testfloatsgt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -629,11 +621,10 @@ ; CHECK-LABEL: @testfloatugt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -648,11 +639,10 @@ ; CHECK-LABEL: @testfloatne ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -667,11 +657,10 @@ ; CHECK-LABEL: @testdoubleslt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -686,11 +675,10 @@ ; CHECK-LABEL: @testdoubleult ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -705,11 +693,10 @@ ; CHECK-LABEL: @testdoublesle ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -724,11 +711,10 @@ ; CHECK-LABEL: @testdoubleule ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -743,11 +729,10 @@ ; CHECK-LABEL: @testdoubleeq ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -762,11 +747,10 @@ ; CHECK-LABEL: @testdoublesge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -781,11 +765,10 @@ ; CHECK-LABEL: @testdoubleuge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -800,11 +783,10 @@ ; CHECK-LABEL: @testdoublesgt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -819,11 +801,10 @@ ; CHECK-LABEL: @testdoubleugt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -838,11 +819,10 @@ ; CHECK-LABEL: @testdoublene ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: fmr 5, 6 -; CHECK: .LBB[[BB]]: ; CHECK: fmr 1, 5 +; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: fmr 1, 6 ; CHECK: blr } @@ -1231,12 +1211,11 @@ ; CHECK-LABEL: @testqv4doubleslt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1250,12 +1229,11 @@ ; CHECK-LABEL: @testqv4doubleult ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1269,12 +1247,11 @@ ; CHECK-LABEL: @testqv4doublesle ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1288,12 +1265,11 @@ ; CHECK-LABEL: @testqv4doubleule ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1307,12 +1283,11 @@ ; CHECK-LABEL: @testqv4doubleeq ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1326,12 +1301,11 @@ ; CHECK-LABEL: @testqv4doublesge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1345,12 +1319,11 @@ ; CHECK-LABEL: @testqv4doubleuge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1364,12 +1337,11 @@ ; CHECK-LABEL: @testqv4doublesgt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1383,12 +1355,11 @@ ; CHECK-LABEL: @testqv4doubleugt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1402,12 +1373,11 @@ ; CHECK-LABEL: @testqv4doublene ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1421,12 +1391,11 @@ ; CHECK-LABEL: @testqv4floatslt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1440,12 +1409,11 @@ ; CHECK-LABEL: @testqv4floatult ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1459,12 +1427,11 @@ ; CHECK-LABEL: @testqv4floatsle ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1478,12 +1445,11 @@ ; CHECK-LABEL: @testqv4floatule ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1497,12 +1463,11 @@ ; CHECK-LABEL: @testqv4floateq ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1516,12 +1481,11 @@ ; CHECK-LABEL: @testqv4floatsge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1535,12 +1499,11 @@ ; CHECK-LABEL: @testqv4floatuge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1554,12 +1517,11 @@ ; CHECK-LABEL: @testqv4floatsgt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1573,12 +1535,11 @@ ; CHECK-LABEL: @testqv4floatugt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1592,12 +1553,11 @@ ; CHECK-LABEL: @testqv4floatne ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1611,12 +1571,11 @@ ; CHECK-LABEL: @testqv4i1slt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1630,12 +1589,11 @@ ; CHECK-LABEL: @testqv4i1ult ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1649,12 +1607,11 @@ ; CHECK-LABEL: @testqv4i1sle ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1668,12 +1625,11 @@ ; CHECK-LABEL: @testqv4i1ule ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1687,12 +1643,11 @@ ; CHECK-LABEL: @testqv4i1eq ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1706,12 +1661,11 @@ ; CHECK-LABEL: @testqv4i1sge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1725,12 +1679,11 @@ ; CHECK-LABEL: @testqv4i1uge ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1744,12 +1697,11 @@ ; CHECK-LABEL: @testqv4i1sgt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1763,12 +1715,11 @@ ; CHECK-LABEL: @testqv4i1ugt ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } @@ -1782,12 +1733,11 @@ ; CHECK-LABEL: @testqv4i1ne ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 +; CHECK-DAG: qvfmr 1, 5 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: qvfmr 5, 6 -; CHECK: .LBB[[BB]]: -; CHECK: qvfmr 1, 5 +; CHECK: bclr 12, [[REG1]], 0 +; CHECK: qvfmr 1, 6 ; CHECK: blr } Index: test/CodeGen/SPARC/32abi.ll =================================================================== --- test/CodeGen/SPARC/32abi.ll +++ test/CodeGen/SPARC/32abi.ll @@ -88,36 +88,28 @@ ; SOFT-NEXT: mov %i2, %o0 ; SOFT-NEXT: call __extendsfdf2 ; SOFT-NEXT: nop -; SOFT-NEXT: mov %o0, %i2 -; SOFT-NEXT: mov %o1, %g2 +; SOFT-NEXT: mov %o0, %o2 +; SOFT-NEXT: mov %o1, %o3 ; SOFT-NEXT: mov %i0, %o0 ; SOFT-NEXT: mov %i1, %o1 -; SOFT-NEXT: mov %i2, %o2 -; SOFT-NEXT: mov %g2, %o3 ; SOFT-NEXT: call __adddf3 ; SOFT-NEXT: nop -; SOFT-NEXT: mov %o0, %i0 -; SOFT-NEXT: mov %o1, %i1 +; SOFT-NEXT: mov %o0, %o2 +; SOFT-NEXT: mov %o1, %o3 ; SOFT-NEXT: mov %i3, %o0 ; SOFT-NEXT: mov %i4, %o1 -; SOFT-NEXT: mov %i0, %o2 -; SOFT-NEXT: mov %i1, %o3 ; SOFT-NEXT: call __adddf3 ; SOFT-NEXT: nop -; SOFT-NEXT: mov %o0, %i0 -; SOFT-NEXT: mov %o1, %i1 +; SOFT-NEXT: mov %o0, %o2 +; SOFT-NEXT: mov %o1, %o3 ; SOFT-NEXT: mov %i5, %o0 ; SOFT-NEXT: mov %l3, %o1 -; SOFT-NEXT: mov %i0, %o2 -; SOFT-NEXT: mov %i1, %o3 ; SOFT-NEXT: call __adddf3 ; SOFT-NEXT: nop -; SOFT-NEXT: mov %o0, %i0 -; SOFT-NEXT: mov %o1, %i1 +; SOFT-NEXT: mov %o0, %o2 +; SOFT-NEXT: mov %o1, %o3 ; SOFT-NEXT: mov %l1, %o0 ; SOFT-NEXT: mov %l2, %o1 -; SOFT-NEXT: mov %i0, %o2 -; SOFT-NEXT: mov %i1, %o3 ; SOFT-NEXT: call __adddf3 ; SOFT-NEXT: nop ; SOFT-NEXT: mov %o0, %i0 Index: test/CodeGen/SPARC/64abi.ll =================================================================== --- test/CodeGen/SPARC/64abi.ll +++ test/CodeGen/SPARC/64abi.ll @@ -65,7 +65,7 @@ ; SOFT: save %sp, -176, %sp ; SOFT: srl %i0, 0, %o0 ; SOFT-NEXT: call __extendsfdf2 -; SOFT: mov %o0, %i0 +; SOFT: mov %o0, %o1 ; SOFT: mov %i1, %o0 ; SOFT: mov %i2, %o0 ; SOFT: mov %i3, %o0 @@ -145,13 +145,11 @@ ; HARD: fstod %f3 ; HARD: faddd %f6 ; HARD: faddd %f16 -; SOFT: mov %o0, %i1 +; SOFT: mov %o0, %o1 ; SOFT-NEXT: mov %i3, %o0 -; SOFT-NEXT: mov %i1, %o1 ; SOFT-NEXT: call __adddf3 -; SOFT: mov %o0, %i1 +; SOFT: mov %o0, %o1 ; SOFT-NEXT: mov %i0, %o0 -; SOFT-NEXT: mov %i1, %o1 ; SOFT-NEXT: call __adddf3 ; HARD: std %f0, [%i1] ; SOFT: stx %o0, [%i5] @@ -217,8 +215,8 @@ ; CHECK-LABEL: call_inreg_fi: ; Allocate space for 6 arguments, even when only 2 are used. ; CHECK: save %sp, -176, %sp -; HARD: sllx %i1, 32, %o0 -; HARD: fmovs %f5, %f1 +; HARD-DAG: sllx %i1, 32, %o0 +; HARD-DAG: fmovs %f5, %f1 ; SOFT: srl %i2, 0, %i0 ; SOFT: sllx %i1, 32, %i1 ; SOFT: or %i1, %i0, %o0 @@ -240,8 +238,8 @@ } ; CHECK-LABEL: call_inreg_ff: -; HARD: fmovs %f3, %f0 -; HARD: fmovs %f5, %f1 +; HARD-DAG: fmovs %f3, %f0 +; HARD-DAG: fmovs %f5, %f1 ; SOFT: srl %i2, 0, %i0 ; SOFT: sllx %i1, 32, %i1 ; SOFT: or %i1, %i0, %o0 @@ -527,9 +525,8 @@ ; CHECK: call sinf ; HARD: ld [%fp+[[Offset1]]], %f1 ; HARD: fmuls %f1, %f0, %f0 -; SOFT: mov %o0, %i0 +; SOFT: mov %o0, %o1 ; SOFT: mov %i1, %o0 -; SOFT: mov %i0, %o1 ; SOFT: call __mulsf3 ; SOFT: sllx %o0, 32, %i0 Index: test/CodeGen/SPARC/64cond.ll =================================================================== --- test/CodeGen/SPARC/64cond.ll +++ test/CodeGen/SPARC/64cond.ll @@ -67,9 +67,10 @@ } ; CHECK: selecti64_fcc +; CHECK: mov %i3, %i0 ; CHECK: fcmps %f1, %f3 -; CHECK: movul %fcc0, %i2, %i3 -; CHECK: restore %g0, %i3, %o0 +; CHECK: movul %fcc0, %i2, %i0 +; CHECK: restore define i64 @selecti64_fcc(float %x, float %y, i64 %a, i64 %b) { entry: %tobool = fcmp ult float %x, %y @@ -78,9 +79,9 @@ } ; CHECK: selectf32_xcc -; CHECK: cmp %i0, %i1 -; CHECK: fmovsg %xcc, %f5, %f7 ; CHECK: fmovs %f7, %f0 +; CHECK: cmp %i0, %i1 +; CHECK: fmovsg %xcc, %f5, %f0 define float @selectf32_xcc(i64 %x, i64 %y, float %a, float %b) { entry: %tobool = icmp sgt i64 %x, %y @@ -89,9 +90,9 @@ } ; CHECK: selectf64_xcc -; CHECK: cmp %i0, %i1 -; CHECK: fmovdg %xcc, %f4, %f6 ; CHECK: fmovd %f6, %f0 +; CHECK: cmp %i0, %i1 +; CHECK: fmovdg %xcc, %f4, %f0 define double @selectf64_xcc(i64 %x, i64 %y, double %a, double %b) { entry: %tobool = icmp sgt i64 %x, %y Index: test/CodeGen/SystemZ/call-03.ll =================================================================== --- test/CodeGen/SystemZ/call-03.ll +++ test/CodeGen/SystemZ/call-03.ll @@ -62,16 +62,13 @@ ; Check an indirect call. In this case the only acceptable choice for ; the target register is %r1. -; -; NOTE: the extra copy 'lgr %r1, %r0' is a coalescing failure. define void @f5(void(i32, i32, i32, i32) *%foo) { ; CHECK-LABEL: f5: -; CHECK: lgr %r0, %r2 +; CHECK: lgr %r1, %r2 ; CHECK-DAG: lhi %r2, 1 ; CHECK-DAG: lhi %r3, 2 ; CHECK-DAG: lhi %r4, 3 ; CHECK-DAG: lhi %r5, 4 -; CHECK: lgr %r1, %r0 ; CHECK: br %r1 tail call void %foo(i32 1, i32 2, i32 3, i32 4) ret void Index: test/CodeGen/SystemZ/swift-return.ll =================================================================== --- test/CodeGen/SystemZ/swift-return.ll +++ test/CodeGen/SystemZ/swift-return.ll @@ -39,9 +39,8 @@ ; in memroy. The caller provides space for the return value and passes ; the address in %r2. The first input argument will be in %r3. ; CHECK-LABEL: test2: -; CHECK: lr %[[REG1:r[0-9]+]], %r2 +; CHECK: lr %r3, %r2 ; CHECK-DAG: la %r2, 160(%r15) -; CHECK-DAG: lr %r3, %[[REG1]] ; CHECK: brasl %r14, gen2 ; CHECK: l %r2, 160(%r15) ; CHECK: a %r2, 164(%r15) Index: test/CodeGen/SystemZ/swifterror.ll =================================================================== --- test/CodeGen/SystemZ/swifterror.ll +++ test/CodeGen/SystemZ/swifterror.ll @@ -34,11 +34,11 @@ ; CHECK: lgr %r[[REG1:[0-9]+]], %r2 ; CHECK: lghi %r9, 0 ; CHECK: brasl %r14, foo -; CHECK: cgijlh %r9, 0, +; CHECK: %r2, %r9 +; CHECK: jlh ; Access part of the error object and save it to error_ref -; CHECK: lb %r[[REG2:[0-9]+]], 8(%r9) +; CHECK: lb %r[[REG2:[0-9]+]], 8(%r2) ; CHECK: stc %r[[REG2]], 0(%r[[REG1]]) -; CHECK: lgr %r2, %r9 ; CHECK: brasl %r14, free ; CHECK-O0-LABEL: caller: ; CHECK-O0: lghi %r9, 0 @@ -246,11 +246,10 @@ ; CHECK: lhi %r3, 1 ; CHECK: lghi %r9, 0 ; CHECK: brasl %r14, foo_sret -; CHECK: cgijlh %r9, 0, +; CHECK: jlh ; Access part of the error object and save it to error_ref -; CHECK: lb %r0, 8(%r9) +; CHECK: lb %r0, 8(%r2) ; CHECK: stc %r0, 0(%r[[REG1]]) -; CHECK: lgr %r2, %r9 ; CHECK: brasl %r14, free ; CHECK-O0-LABEL: caller3: @@ -296,21 +295,21 @@ ; The first swifterror value: ; CHECK: lghi %r9, 0 ; CHECK: brasl %r14, foo -; CHECK: cgijlh %r9, 0, +; CHECK: ltgr %r2, %r9 +; CHECK: jlh ; Access part of the error object and save it to error_ref -; CHECK: lb %r0, 8(%r9) +; CHECK: lb %r0, 8(%r2) ; CHECK: stc %r0, 0(%r[[REG1]]) -; CHECK: lgr %r2, %r9 ; CHECK: brasl %r14, free ; The second swifterror value: ; CHECK: lghi %r9, 0 ; CHECK: brasl %r14, foo -; CHECK: cgijlh %r9, 0, +; CHECK: ltgr %r2, %r9 +; CHECK: jlh ; Access part of the error object and save it to error_ref -; CHECK: lb %r0, 8(%r9) +; CHECK: lb %r0, 8(%r2) ; CHECK: stc %r0, 0(%r[[REG2]]) -; CHECK: lgr %r2, %r9 ; CHECK: brasl %r14, free ; CHECK-O0-LABEL: caller_with_multiple_swifterror_values: Index: test/CodeGen/Thumb/long.ll =================================================================== --- test/CodeGen/Thumb/long.ll +++ test/CodeGen/Thumb/long.ll @@ -59,10 +59,10 @@ %tmp1 = add i64 %y, 10 ret i64 %tmp1 ; CHECK-LABEL: f6a: +; CHECK: movs r0, r2 ; CHECK: movs r1, #0 -; CHECK: adds r2, #10 +; CHECK: adds r0, #10 ; CHECK: adcs r1, r3 -; CHECK: movs r0, r2 } define i64 @f6b(i64 %x, i64 %y) { @@ -101,11 +101,11 @@ %tmp1 = sub i64 %y, 10 ret i64 %tmp1 ; CHECK-LABEL: f9a: -; CHECK: movs r0, #0 -; CHECK: subs r2, #10 -; CHECK: sbcs r3, r0 -; CHECK: movs r0, r2 ; CHECK: movs r1, r3 +; CHECK: movs r0, r2 +; CHECK: movs r2, #0 +; CHECK: subs r0, #10 +; CHECK: sbcs r1, r2 } define i64 @f9b(i64 %x, i64 %y) { ; ADDC with big negative imm => SUBS reg @@ -184,14 +184,14 @@ %tmp2 = add i64 %tmp1, -1000 ret i64 %tmp2 ; CHECK-LABEL: f11: +; CHECK: movs r1, r3 ; CHECK: movs r0, #125 ; CHECK: lsls r0, r0, #3 -; CHECK: movs r1, #0 +; CHECK: movs r3, #0 ; CHECK: subs r2, r2, r0 -; CHECK: sbcs r3, r1 +; CHECK: sbcs r1, r3 ; CHECK: subs r0, r2, r0 -; CHECK: sbcs r3, r1 -; CHECK: movs r1, r3 +; CHECK: sbcs r1, r3 } ; "sub 2147483648" has to be lowered into "add -2147483648" Index: test/CodeGen/Thumb2/aapcs.ll =================================================================== --- test/CodeGen/Thumb2/aapcs.ll +++ test/CodeGen/Thumb2/aapcs.ll @@ -14,8 +14,8 @@ define double @double_in_reg(double %a, double %b) { entry: ; CHECK-LABEL: double_in_reg: -; SOFT: mov r0, r2 ; SOFT: mov r1, r3 +; SOFT: mov r0, r2 ; SP: vmov.f32 s0, s2 ; SP: vmov.f32 s1, s3 ; DP: vmov.f64 d0, d1 Index: test/CodeGen/Thumb2/thumb2-select_xform.ll =================================================================== --- test/CodeGen/Thumb2/thumb2-select_xform.ll +++ test/CodeGen/Thumb2/thumb2-select_xform.ll @@ -2,11 +2,11 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK: t1 -; CHECK: mvn r0, #-2147483648 +; CHECK: mov r0, r1 +; CHECK: mvn r1, #-2147483648 ; CHECK: cmp r2, #10 ; CHECK: it le -; CHECK: addle r1, r0 -; CHECK: mov r0, r1 +; CHECK: addle r0, r1 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 2147483647 %tmp3 = add i32 %tmp2, %b @@ -15,10 +15,10 @@ define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK: t2 +; CHECK: mov r0, r1 ; CHECK: cmp r2, #10 ; CHECK: it le -; CHECK: addle.w r1, r1, #-2147483648 -; CHECK: mov r0, r1 +; CHECK: addle.w r0, r0, #-2147483648 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 2147483648 @@ -28,10 +28,10 @@ define i32 @t3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK: t3 +; CHECK: mov r0, r1 ; CHECK: cmp r2, #10 ; CHECK: it le -; CHECK: suble r1, #10 -; CHECK: mov r0, r1 +; CHECK: suble r0, #10 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 10 %tmp3 = sub i32 %b, %tmp2 Index: test/CodeGen/X86/GlobalISel/add-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/add-scalar.ll +++ test/CodeGen/X86/GlobalISel/add-scalar.ll @@ -63,8 +63,9 @@ define i8 @test_add_i8(i8 %arg1, i8 %arg2) { ; X64-LABEL: test_add_i8: ; X64: # BB#0: -; X64-NEXT: addb %dil, %sil ; X64-NEXT: movl %esi, %eax +; X64-NEXT: addb %dil, %al +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq ; ; X32-LABEL: test_add_i8: Index: test/CodeGen/X86/GlobalISel/and-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/and-scalar.ll +++ test/CodeGen/X86/GlobalISel/and-scalar.ll @@ -18,8 +18,9 @@ define i8 @test_and_i8(i8 %arg1, i8 %arg2) { ; ALL-LABEL: test_and_i8: ; ALL: # BB#0: -; ALL-NEXT: andb %dil, %sil ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: andb %dil, %al +; ALL-NEXT: # kill: %AL %AL %EAX ; ALL-NEXT: retq %ret = and i8 %arg1, %arg2 ret i8 %ret @@ -28,8 +29,9 @@ define i16 @test_and_i16(i16 %arg1, i16 %arg2) { ; ALL-LABEL: test_and_i16: ; ALL: # BB#0: -; ALL-NEXT: andw %di, %si ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: andw %di, %ax +; ALL-NEXT: # kill: %AX %AX %EAX ; ALL-NEXT: retq %ret = and i16 %arg1, %arg2 ret i16 %ret @@ -38,8 +40,8 @@ define i32 @test_and_i32(i32 %arg1, i32 %arg2) { ; ALL-LABEL: test_and_i32: ; ALL: # BB#0: -; ALL-NEXT: andl %edi, %esi ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: andl %edi, %eax ; ALL-NEXT: retq %ret = and i32 %arg1, %arg2 ret i32 %ret @@ -48,8 +50,8 @@ define i64 @test_and_i64(i64 %arg1, i64 %arg2) { ; ALL-LABEL: test_and_i64: ; ALL: # BB#0: -; ALL-NEXT: andq %rdi, %rsi ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: andq %rdi, %rax ; ALL-NEXT: retq %ret = and i64 %arg1, %arg2 ret i64 %ret Index: test/CodeGen/X86/GlobalISel/binop.ll =================================================================== --- test/CodeGen/X86/GlobalISel/binop.ll +++ test/CodeGen/X86/GlobalISel/binop.ll @@ -7,8 +7,8 @@ define i64 @test_sub_i64(i64 %arg1, i64 %arg2) { ; ALL-LABEL: test_sub_i64: ; ALL: # BB#0: -; ALL-NEXT: subq %rsi, %rdi ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: subq %rsi, %rax ; ALL-NEXT: retq %ret = sub i64 %arg1, %arg2 ret i64 %ret @@ -17,8 +17,8 @@ define i32 @test_sub_i32(i32 %arg1, i32 %arg2) { ; ALL-LABEL: test_sub_i32: ; ALL: # BB#0: -; ALL-NEXT: subl %esi, %edi ; ALL-NEXT: movl %edi, %eax +; ALL-NEXT: subl %esi, %eax ; ALL-NEXT: retq %ret = sub i32 %arg1, %arg2 ret i32 %ret Index: test/CodeGen/X86/GlobalISel/callingconv.ll =================================================================== --- test/CodeGen/X86/GlobalISel/callingconv.ll +++ test/CodeGen/X86/GlobalISel/callingconv.ll @@ -38,6 +38,7 @@ ; X64-LABEL: test_arg_i8: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq ret i8 %a } @@ -51,6 +52,7 @@ ; X64-LABEL: test_arg_i16: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq ret i16 %a } @@ -114,8 +116,8 @@ ; X32: # BB#0: ; X32-NEXT: subl $12, %esp ; X32-NEXT: .cfi_def_cfa_offset 16 -; X32-NEXT: movups 16(%esp), %xmm1 -; X32-NEXT: movaps %xmm2, %xmm0 +; X32-DAG: movups 16(%esp), %xmm1 +; X32-DAG: movaps %xmm2, %xmm0 ; X32-NEXT: addl $12, %esp ; X32-NEXT: retl ; @@ -248,8 +250,8 @@ ; X32-NEXT: .cfi_def_cfa_offset 48 ; X32-NEXT: movaps %xmm0, (%esp) # 16-byte Spill ; X32-NEXT: movaps %xmm1, 16(%esp) # 16-byte Spill -; X32-NEXT: movdqu 48(%esp), %xmm1 -; X32-NEXT: movdqa %xmm2, %xmm0 +; X32-DAG: movdqu 48(%esp), %xmm1 +; X32-DAG: movdqa %xmm2, %xmm0 ; X32-NEXT: calll split_return_callee ; X32-NEXT: paddd (%esp), %xmm0 # 16-byte Folded Reload ; X32-NEXT: paddd 16(%esp), %xmm1 # 16-byte Folded Reload Index: test/CodeGen/X86/GlobalISel/ext-x86-64.ll =================================================================== --- test/CodeGen/X86/GlobalISel/ext-x86-64.ll +++ test/CodeGen/X86/GlobalISel/ext-x86-64.ll @@ -6,9 +6,8 @@ define i64 @test_zext_i1(i8 %a) { ; X64-LABEL: test_zext_i1: ; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: andq $1, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andq $1, %rax ; X64-NEXT: retq %val = trunc i8 %a to i1 %r = zext i1 %val to i64 Index: test/CodeGen/X86/GlobalISel/ext.ll =================================================================== --- test/CodeGen/X86/GlobalISel/ext.ll +++ test/CodeGen/X86/GlobalISel/ext.ll @@ -5,8 +5,9 @@ define i8 @test_zext_i1toi8(i32 %a) { ; X64-LABEL: test_zext_i1toi8: ; X64: # BB#0: -; X64-NEXT: andb $1, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $1, %al +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq ; ; X32-LABEL: test_zext_i1toi8: @@ -23,8 +24,9 @@ define i16 @test_zext_i1toi16(i32 %a) { ; X64-LABEL: test_zext_i1toi16: ; X64: # BB#0: -; X64-NEXT: andw $1, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andw $1, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq ; ; X32-LABEL: test_zext_i1toi16: @@ -41,8 +43,8 @@ define i32 @test_zext_i1(i32 %a) { ; X64-LABEL: test_zext_i1: ; X64: # BB#0: -; X64-NEXT: andl $1, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $1, %eax ; X64-NEXT: retq ; ; X32-LABEL: test_zext_i1: Index: test/CodeGen/X86/GlobalISel/memop-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/memop-scalar.ll +++ test/CodeGen/X86/GlobalISel/memop-scalar.ll @@ -82,9 +82,9 @@ define i1 * @test_store_i1(i1 %val, i1 * %p1) { ; ALL-LABEL: test_store_i1: ; ALL: # BB#0: -; ALL-NEXT: andb $1, %dil -; ALL-NEXT: movb %dil, (%rsi) ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: andb $1, %dil +; ALL-NEXT: movb %dil, (%rax) ; ALL-NEXT: retq store i1 %val, i1* %p1 ret i1 * %p1; @@ -93,8 +93,8 @@ define i32 * @test_store_i32(i32 %val, i32 * %p1) { ; ALL-LABEL: test_store_i32: ; ALL: # BB#0: -; ALL-NEXT: movl %edi, (%rsi) ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: movl %edi, (%rax) ; ALL-NEXT: retq store i32 %val, i32* %p1 ret i32 * %p1; @@ -103,8 +103,8 @@ define i64 * @test_store_i64(i64 %val, i64 * %p1) { ; ALL-LABEL: test_store_i64: ; ALL: # BB#0: -; ALL-NEXT: movq %rdi, (%rsi) ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: movq %rdi, (%rax) ; ALL-NEXT: retq store i64 %val, i64* %p1 ret i64 * %p1; @@ -114,15 +114,15 @@ ; ; SSE_FAST-LABEL: test_store_float: ; SSE_FAST: # BB#0: -; SSE_FAST-NEXT: movd %xmm0, %eax -; SSE_FAST-NEXT: movl %eax, (%rdi) ; SSE_FAST-NEXT: movq %rdi, %rax +; SSE_FAST-NEXT: movd %xmm0, %ecx +; SSE_FAST-NEXT: movl %ecx, (%rax) ; SSE_FAST-NEXT: retq ; ; SSE_GREEDY-LABEL: test_store_float: ; SSE_GREEDY: # BB#0: -; SSE_GREEDY-NEXT: movss %xmm0, (%rdi) ; SSE_GREEDY-NEXT: movq %rdi, %rax +; SSE_GREEDY-NEXT: movss %xmm0, (%rax) ; SSE_GREEDY-NEXT: retq store float %val, float* %p1 ret float * %p1; @@ -132,15 +132,15 @@ ; ; SSE_FAST-LABEL: test_store_double: ; SSE_FAST: # BB#0: -; SSE_FAST-NEXT: movq %xmm0, %rax -; SSE_FAST-NEXT: movq %rax, (%rdi) ; SSE_FAST-NEXT: movq %rdi, %rax +; SSE_FAST-NEXT: movq %xmm0, %rcx +; SSE_FAST-NEXT: movq %rcx, (%rax) ; SSE_FAST-NEXT: retq ; ; SSE_GREEDY-LABEL: test_store_double: ; SSE_GREEDY: # BB#0: -; SSE_GREEDY-NEXT: movsd %xmm0, (%rdi) ; SSE_GREEDY-NEXT: movq %rdi, %rax +; SSE_GREEDY-NEXT: movsd %xmm0, (%rax) ; SSE_GREEDY-NEXT: retq store double %val, double* %p1 ret double * %p1; Index: test/CodeGen/X86/GlobalISel/mul-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/mul-scalar.ll +++ test/CodeGen/X86/GlobalISel/mul-scalar.ll @@ -10,8 +10,9 @@ define i16 @test_mul_i16(i16 %arg1, i16 %arg2) { ; X64-LABEL: test_mul_i16: ; X64: # BB#0: -; X64-NEXT: imulw %di, %si ; X64-NEXT: movl %esi, %eax +; X64-NEXT: imulw %di, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %ret = mul i16 %arg1, %arg2 ret i16 %ret @@ -20,8 +21,8 @@ define i32 @test_mul_i32(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_mul_i32: ; X64: # BB#0: -; X64-NEXT: imull %edi, %esi ; X64-NEXT: movl %esi, %eax +; X64-NEXT: imull %edi, %eax ; X64-NEXT: retq %ret = mul i32 %arg1, %arg2 ret i32 %ret @@ -30,8 +31,8 @@ define i64 @test_mul_i64(i64 %arg1, i64 %arg2) { ; X64-LABEL: test_mul_i64: ; X64: # BB#0: -; X64-NEXT: imulq %rdi, %rsi ; X64-NEXT: movq %rsi, %rax +; X64-NEXT: imulq %rdi, %rax ; X64-NEXT: retq %ret = mul i64 %arg1, %arg2 ret i64 %ret Index: test/CodeGen/X86/GlobalISel/or-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/or-scalar.ll +++ test/CodeGen/X86/GlobalISel/or-scalar.ll @@ -18,8 +18,9 @@ define i8 @test_or_i8(i8 %arg1, i8 %arg2) { ; ALL-LABEL: test_or_i8: ; ALL: # BB#0: -; ALL-NEXT: orb %dil, %sil ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: orb %dil, %al +; ALL-NEXT: # kill: %AL %AL %EAX ; ALL-NEXT: retq %ret = or i8 %arg1, %arg2 ret i8 %ret @@ -28,8 +29,9 @@ define i16 @test_or_i16(i16 %arg1, i16 %arg2) { ; ALL-LABEL: test_or_i16: ; ALL: # BB#0: -; ALL-NEXT: orw %di, %si ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: orw %di, %ax +; ALL-NEXT: # kill: %AX %AX %EAX ; ALL-NEXT: retq %ret = or i16 %arg1, %arg2 ret i16 %ret @@ -38,8 +40,8 @@ define i32 @test_or_i32(i32 %arg1, i32 %arg2) { ; ALL-LABEL: test_or_i32: ; ALL: # BB#0: -; ALL-NEXT: orl %edi, %esi ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: orl %edi, %eax ; ALL-NEXT: retq %ret = or i32 %arg1, %arg2 ret i32 %ret @@ -48,8 +50,8 @@ define i64 @test_or_i64(i64 %arg1, i64 %arg2) { ; ALL-LABEL: test_or_i64: ; ALL: # BB#0: -; ALL-NEXT: orq %rdi, %rsi ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: orq %rdi, %rax ; ALL-NEXT: retq %ret = or i64 %arg1, %arg2 ret i64 %ret Index: test/CodeGen/X86/GlobalISel/phi.ll =================================================================== --- test/CodeGen/X86/GlobalISel/phi.ll +++ test/CodeGen/X86/GlobalISel/phi.ll @@ -4,15 +4,16 @@ define i8 @test_i8(i32 %a, i8 %f, i8 %t) { ; ALL-LABEL: test_i8: ; ALL: # BB#0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi -; ALL-NEXT: setg %al -; ALL-NEXT: testb $1, %al +; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorl %ecx, %ecx +; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: setg %cl +; ALL-NEXT: testb $1, %cl ; ALL-NEXT: jne .LBB0_2 ; ALL-NEXT: # BB#1: # %cond.false -; ALL-NEXT: movl %edx, %esi +; ALL-NEXT: movl %edx, %eax ; ALL-NEXT: .LBB0_2: # %cond.end -; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: # kill: %AL %AL %EAX ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 @@ -32,15 +33,16 @@ define i16 @test_i16(i32 %a, i16 %f, i16 %t) { ; ALL-LABEL: test_i16: ; ALL: # BB#0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi -; ALL-NEXT: setg %al -; ALL-NEXT: testb $1, %al +; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorl %ecx, %ecx +; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: setg %cl +; ALL-NEXT: testb $1, %cl ; ALL-NEXT: jne .LBB1_2 ; ALL-NEXT: # BB#1: # %cond.false -; ALL-NEXT: movl %edx, %esi +; ALL-NEXT: movl %edx, %eax ; ALL-NEXT: .LBB1_2: # %cond.end -; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: # kill: %AX %AX %EAX ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 @@ -60,15 +62,15 @@ define i32 @test_i32(i32 %a, i32 %f, i32 %t) { ; ALL-LABEL: test_i32: ; ALL: # BB#0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi -; ALL-NEXT: setg %al -; ALL-NEXT: testb $1, %al +; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorl %ecx, %ecx +; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: setg %cl +; ALL-NEXT: testb $1, %cl ; ALL-NEXT: jne .LBB2_2 ; ALL-NEXT: # BB#1: # %cond.false -; ALL-NEXT: movl %edx, %esi +; ALL-NEXT: movl %edx, %eax ; ALL-NEXT: .LBB2_2: # %cond.end -; ALL-NEXT: movl %esi, %eax ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 @@ -88,15 +90,15 @@ define i64 @test_i64(i32 %a, i64 %f, i64 %t) { ; ALL-LABEL: test_i64: ; ALL: # BB#0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi -; ALL-NEXT: setg %al -; ALL-NEXT: testb $1, %al +; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: xorl %ecx, %ecx +; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: setg %cl +; ALL-NEXT: testb $1, %cl ; ALL-NEXT: jne .LBB3_2 ; ALL-NEXT: # BB#1: # %cond.false -; ALL-NEXT: movq %rdx, %rsi +; ALL-NEXT: movq %rdx, %rax ; ALL-NEXT: .LBB3_2: # %cond.end -; ALL-NEXT: movq %rsi, %rax ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 Index: test/CodeGen/X86/GlobalISel/sub-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/sub-scalar.ll +++ test/CodeGen/X86/GlobalISel/sub-scalar.ll @@ -4,8 +4,8 @@ define i64 @test_sub_i64(i64 %arg1, i64 %arg2) { ; X64-LABEL: test_sub_i64: ; X64: # BB#0: -; X64-NEXT: subq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: subq %rsi, %rax ; X64-NEXT: retq %ret = sub i64 %arg1, %arg2 ret i64 %ret @@ -14,8 +14,8 @@ define i32 @test_sub_i32(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_sub_i32: ; X64: # BB#0: -; X64-NEXT: subl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: subl %esi, %eax ; X64-NEXT: retq %ret = sub i32 %arg1, %arg2 ret i32 %ret @@ -24,8 +24,9 @@ define i16 @test_sub_i16(i16 %arg1, i16 %arg2) { ; X64-LABEL: test_sub_i16: ; X64: # BB#0: -; X64-NEXT: subw %si, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: subw %si, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %ret = sub i16 %arg1, %arg2 ret i16 %ret @@ -34,8 +35,9 @@ define i8 @test_sub_i8(i8 %arg1, i8 %arg2) { ; X64-LABEL: test_sub_i8: ; X64: # BB#0: -; X64-NEXT: subb %sil, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: subb %sil, %al +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq %ret = sub i8 %arg1, %arg2 ret i8 %ret @@ -44,9 +46,9 @@ define i32 @test_sub_i1(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_sub_i1: ; X64: # BB#0: -; X64-NEXT: subb %sil, %dil -; X64-NEXT: andl $1, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: subb %sil, %al +; X64-NEXT: andl $1, %eax ; X64-NEXT: retq %a1 = trunc i32 %arg1 to i1 %a2 = trunc i32 %arg2 to i1 Index: test/CodeGen/X86/GlobalISel/trunc.ll =================================================================== --- test/CodeGen/X86/GlobalISel/trunc.ll +++ test/CodeGen/X86/GlobalISel/trunc.ll @@ -5,6 +5,7 @@ ; CHECK-LABEL: trunc_i32toi1: ; CHECK: # BB#0: ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %r = trunc i32 %a to i1 ret i1 %r @@ -14,6 +15,7 @@ ; CHECK-LABEL: trunc_i32toi8: ; CHECK: # BB#0: ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %r = trunc i32 %a to i8 ret i8 %r @@ -23,6 +25,7 @@ ; CHECK-LABEL: trunc_i32toi16: ; CHECK: # BB#0: ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: retq %r = trunc i32 %a to i16 ret i16 %r @@ -31,7 +34,8 @@ define i8 @trunc_i64toi8(i64 %a) { ; CHECK-LABEL: trunc_i64toi8: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %AL %AL %RAX ; CHECK-NEXT: retq %r = trunc i64 %a to i8 ret i8 %r @@ -40,7 +44,8 @@ define i16 @trunc_i64toi16(i64 %a) { ; CHECK-LABEL: trunc_i64toi16: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %AX %AX %RAX ; CHECK-NEXT: retq %r = trunc i64 %a to i16 ret i16 %r @@ -49,7 +54,8 @@ define i32 @trunc_i64toi32(i64 %a) { ; CHECK-LABEL: trunc_i64toi32: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %EAX %EAX %RAX ; CHECK-NEXT: retq %r = trunc i64 %a to i32 ret i32 %r Index: test/CodeGen/X86/GlobalISel/undef.ll =================================================================== --- test/CodeGen/X86/GlobalISel/undef.ll +++ test/CodeGen/X86/GlobalISel/undef.ll @@ -11,8 +11,9 @@ define i8 @test2(i8 %a) { ; ALL-LABEL: test2: ; ALL: # BB#0: -; ALL-NEXT: addb %al, %dil ; ALL-NEXT: movl %edi, %eax +; ALL-NEXT: addb %al, %al +; ALL-NEXT: # kill: %AL %AL %EAX ; ALL-NEXT: retq %r = add i8 %a, undef ret i8 %r Index: test/CodeGen/X86/GlobalISel/xor-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/xor-scalar.ll +++ test/CodeGen/X86/GlobalISel/xor-scalar.ll @@ -18,8 +18,9 @@ define i8 @test_xor_i8(i8 %arg1, i8 %arg2) { ; ALL-LABEL: test_xor_i8: ; ALL: # BB#0: -; ALL-NEXT: xorb %dil, %sil ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorb %dil, %al +; ALL-NEXT: # kill: %AL %AL %EAX ; ALL-NEXT: retq %ret = xor i8 %arg1, %arg2 ret i8 %ret @@ -28,8 +29,9 @@ define i16 @test_xor_i16(i16 %arg1, i16 %arg2) { ; ALL-LABEL: test_xor_i16: ; ALL: # BB#0: -; ALL-NEXT: xorw %di, %si ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorw %di, %ax +; ALL-NEXT: # kill: %AX %AX %EAX ; ALL-NEXT: retq %ret = xor i16 %arg1, %arg2 ret i16 %ret @@ -38,8 +40,8 @@ define i32 @test_xor_i32(i32 %arg1, i32 %arg2) { ; ALL-LABEL: test_xor_i32: ; ALL: # BB#0: -; ALL-NEXT: xorl %edi, %esi ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorl %edi, %eax ; ALL-NEXT: retq %ret = xor i32 %arg1, %arg2 ret i32 %ret @@ -48,8 +50,8 @@ define i64 @test_xor_i64(i64 %arg1, i64 %arg2) { ; ALL-LABEL: test_xor_i64: ; ALL: # BB#0: -; ALL-NEXT: xorq %rdi, %rsi ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: xorq %rdi, %rax ; ALL-NEXT: retq %ret = xor i64 %arg1, %arg2 ret i64 %ret Index: test/CodeGen/X86/add.ll =================================================================== --- test/CodeGen/X86/add.ll +++ test/CodeGen/X86/add.ll @@ -16,14 +16,14 @@ ; ; X64-LINUX-LABEL: test1: ; X64-LINUX: # BB#0: # %entry -; X64-LINUX-NEXT: subl $-128, %edi ; X64-LINUX-NEXT: movl %edi, %eax +; X64-LINUX-NEXT: subl $-128, %eax ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test1: ; X64-WIN32: # BB#0: # %entry -; X64-WIN32-NEXT: subl $-128, %ecx ; X64-WIN32-NEXT: movl %ecx, %eax +; X64-WIN32-NEXT: subl $-128, %eax ; X64-WIN32-NEXT: retq entry: %b = add i32 %a, 128 @@ -38,14 +38,14 @@ ; ; X64-LINUX-LABEL: test2: ; X64-LINUX: # BB#0: # %entry -; X64-LINUX-NEXT: subq $-2147483648, %rdi # imm = 0x80000000 ; X64-LINUX-NEXT: movq %rdi, %rax +; X64-LINUX-NEXT: subq $-2147483648, %rax # imm = 0x80000000 ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test2: ; X64-WIN32: # BB#0: # %entry -; X64-WIN32-NEXT: subq $-2147483648, %rcx # imm = 0x80000000 ; X64-WIN32-NEXT: movq %rcx, %rax +; X64-WIN32-NEXT: subq $-2147483648, %rax # imm = 0x80000000 ; X64-WIN32-NEXT: retq entry: %b = add i64 %a, 2147483648 @@ -60,14 +60,14 @@ ; ; X64-LINUX-LABEL: test3: ; X64-LINUX: # BB#0: # %entry -; X64-LINUX-NEXT: subq $-128, %rdi ; X64-LINUX-NEXT: movq %rdi, %rax +; X64-LINUX-NEXT: subq $-128, %rax ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test3: ; X64-WIN32: # BB#0: # %entry -; X64-WIN32-NEXT: subq $-128, %rcx ; X64-WIN32-NEXT: movq %rcx, %rax +; X64-WIN32-NEXT: subq $-128, %rax ; X64-WIN32-NEXT: retq entry: %b = add i64 %a, 128 @@ -204,16 +204,16 @@ ; ; X64-LINUX-LABEL: test7: ; X64-LINUX: # BB#0: # %entry -; X64-LINUX-NEXT: addl %esi, %edi -; X64-LINUX-NEXT: setb %dl ; X64-LINUX-NEXT: movl %edi, %eax +; X64-LINUX-NEXT: addl %esi, %eax +; X64-LINUX-NEXT: setb %dl ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test7: ; X64-WIN32: # BB#0: # %entry -; X64-WIN32-NEXT: addl %edx, %ecx -; X64-WIN32-NEXT: setb %dl ; X64-WIN32-NEXT: movl %ecx, %eax +; X64-WIN32-NEXT: addl %edx, %eax +; X64-WIN32-NEXT: setb %dl ; X64-WIN32-NEXT: retq entry: %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) @@ -233,16 +233,16 @@ ; ; X64-LINUX-LABEL: test8: ; X64-LINUX: # BB#0: # %entry -; X64-LINUX-NEXT: addq %rsi, %rdi -; X64-LINUX-NEXT: setb %dl ; X64-LINUX-NEXT: movq %rdi, %rax +; X64-LINUX-NEXT: addq %rsi, %rax +; X64-LINUX-NEXT: setb %dl ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test8: ; X64-WIN32: # BB#0: # %entry -; X64-WIN32-NEXT: addq %rdx, %rcx -; X64-WIN32-NEXT: setb %dl ; X64-WIN32-NEXT: movq %rcx, %rax +; X64-WIN32-NEXT: addq %rdx, %rax +; X64-WIN32-NEXT: setb %dl ; X64-WIN32-NEXT: retq entry: %extleft = zext i64 %left to i65 @@ -268,20 +268,20 @@ ; ; X64-LINUX-LABEL: test9: ; X64-LINUX: # BB#0: # %entry -; X64-LINUX-NEXT: xorl %eax, %eax -; X64-LINUX-NEXT: cmpl $10, %edi -; X64-LINUX-NEXT: sete %al -; X64-LINUX-NEXT: subl %eax, %esi ; X64-LINUX-NEXT: movl %esi, %eax +; X64-LINUX-NEXT: xorl %ecx, %ecx +; X64-LINUX-NEXT: cmpl $10, %edi +; X64-LINUX-NEXT: sete %cl +; X64-LINUX-NEXT: subl %ecx, %eax ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test9: ; X64-WIN32: # BB#0: # %entry -; X64-WIN32-NEXT: xorl %eax, %eax -; X64-WIN32-NEXT: cmpl $10, %ecx -; X64-WIN32-NEXT: sete %al -; X64-WIN32-NEXT: subl %eax, %edx ; X64-WIN32-NEXT: movl %edx, %eax +; X64-WIN32-NEXT: xorl %edx, %edx +; X64-WIN32-NEXT: cmpl $10, %ecx +; X64-WIN32-NEXT: sete %dl +; X64-WIN32-NEXT: subl %edx, %eax ; X64-WIN32-NEXT: retq entry: %cmp = icmp eq i32 %x, 10 Index: test/CodeGen/X86/addcarry.ll =================================================================== --- test/CodeGen/X86/addcarry.ll +++ test/CodeGen/X86/addcarry.ll @@ -110,15 +110,15 @@ define %scalar @pr31719(%scalar* nocapture readonly %this, %scalar %arg.b) { ; CHECK-LABEL: pr31719: ; CHECK: # BB#0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: addq (%rsi), %rdx ; CHECK-NEXT: adcq 8(%rsi), %rcx ; CHECK-NEXT: adcq 16(%rsi), %r8 ; CHECK-NEXT: adcq 24(%rsi), %r9 -; CHECK-NEXT: movq %rdx, (%rdi) -; CHECK-NEXT: movq %rcx, 8(%rdi) -; CHECK-NEXT: movq %r8, 16(%rdi) -; CHECK-NEXT: movq %r9, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rdx, (%rax) +; CHECK-NEXT: movq %rcx, 8(%rax) +; CHECK-NEXT: movq %r8, 16(%rax) +; CHECK-NEXT: movq %r9, 24(%rax) ; CHECK-NEXT: retq entry: %0 = extractvalue %scalar %arg.b, 0 @@ -206,9 +206,9 @@ define i64 @shiftadd(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: shiftadd: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: addq %rsi, %rdi -; CHECK-NEXT: adcq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: addq %rsi, %rdi +; CHECK-NEXT: adcq %rcx, %rax ; CHECK-NEXT: retq entry: %0 = zext i64 %a to i128 @@ -226,23 +226,23 @@ define %S @readd(%S* nocapture readonly %this, %S %arg.b) { ; CHECK-LABEL: readd: ; CHECK: # BB#0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: addq (%rsi), %rdx -; CHECK-NEXT: movq 8(%rsi), %r10 -; CHECK-NEXT: adcq $0, %r10 -; CHECK-NEXT: setb %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: addq %rcx, %r10 -; CHECK-NEXT: adcq 16(%rsi), %rax +; CHECK-NEXT: movq 8(%rsi), %r11 +; CHECK-NEXT: adcq $0, %r11 +; CHECK-NEXT: setb %r10b +; CHECK-NEXT: movzbl %r10b, %edi +; CHECK-NEXT: addq %rcx, %r11 +; CHECK-NEXT: adcq 16(%rsi), %rdi ; CHECK-NEXT: setb %cl ; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: addq %r8, %rax +; CHECK-NEXT: addq %r8, %rdi ; CHECK-NEXT: adcq 24(%rsi), %rcx ; CHECK-NEXT: addq %r9, %rcx -; CHECK-NEXT: movq %rdx, (%rdi) -; CHECK-NEXT: movq %r10, 8(%rdi) -; CHECK-NEXT: movq %rax, 16(%rdi) -; CHECK-NEXT: movq %rcx, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rdx, (%rax) +; CHECK-NEXT: movq %r11, 8(%rax) +; CHECK-NEXT: movq %rdi, 16(%rax) +; CHECK-NEXT: movq %rcx, 24(%rax) ; CHECK-NEXT: retq entry: %0 = extractvalue %S %arg.b, 0 Index: test/CodeGen/X86/andimm8.ll =================================================================== --- test/CodeGen/X86/andimm8.ll +++ test/CodeGen/X86/andimm8.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -show-mc-encoding | FileCheck %s ; PR8365 -; CHECK: andl $-64, %edi # encoding: [0x83,0xe7,0xc0] +; CHECK: andl $-64, %eax # encoding: [0x83,0xe0,0xc0] define i64 @bra(i32 %zed) nounwind { %t1 = zext i32 %zed to i64 @@ -19,13 +19,13 @@ } define i64 @bar(i64 %zed) nounwind { -; CHECK: andl $42, %edi # encoding: [0x83,0xe7,0x2a] +; CHECK: andl $42, %eax # encoding: [0x83,0xe0,0x2a] %t1 = and i64 %zed, 42 ret i64 %t1 } define i64 @baz(i64 %zed) nounwind { -; CHECK: andl $2147483647, %edi # encoding: [0x81,0xe7,0xff,0xff,0xff,0x7f] +; CHECK: andl $2147483647, %eax # encoding: [0x25,0xff,0xff,0xff,0x7f] %t1 = and i64 %zed, 2147483647 ret i64 %t1 } Index: test/CodeGen/X86/anyext.ll =================================================================== --- test/CodeGen/X86/anyext.ll +++ test/CodeGen/X86/anyext.ll @@ -41,8 +41,9 @@ ; ; X64-LABEL: bar: ; X64: # BB#0: -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: divw %si ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: andl $1, %eax Index: test/CodeGen/X86/apm.ll =================================================================== --- test/CodeGen/X86/apm.ll +++ test/CodeGen/X86/apm.ll @@ -3,8 +3,8 @@ ; PR8573 ; CHECK-LABEL: foo: -; CHECK: leaq (%rdi), %rax -; CHECK-NEXT: movl %esi, %ecx +; CHECK: movl %esi, %ecx +; CHECK-NEXT: leaq (%rdi), %rax ; CHECK-NEXT: monitor ; WIN64-LABEL: foo: ; WIN64: leaq (%rcx), %rax @@ -20,8 +20,8 @@ declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind ; CHECK-LABEL: bar: -; CHECK: movl %edi, %ecx -; CHECK-NEXT: movl %esi, %eax +; CHECK: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx ; CHECK-NEXT: mwait ; WIN64-LABEL: bar: ; WIN64: movl %edx, %eax Index: test/CodeGen/X86/atomic-eflags-reuse.ll =================================================================== --- test/CodeGen/X86/atomic-eflags-reuse.ll +++ test/CodeGen/X86/atomic-eflags-reuse.ll @@ -4,9 +4,9 @@ define i32 @test_add_1_cmov_slt(i64* %p, i32 %a0, i32 %a1) #0 { ; CHECK-LABEL: test_add_1_cmov_slt: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: lock incq (%rdi) -; CHECK-NEXT: cmovgl %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: lock incq (%rdi) +; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: retq entry: %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst @@ -18,9 +18,9 @@ define i32 @test_add_1_cmov_sge(i64* %p, i32 %a0, i32 %a1) #0 { ; CHECK-LABEL: test_add_1_cmov_sge: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: lock incq (%rdi) -; CHECK-NEXT: cmovlel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: lock incq (%rdi) +; CHECK-NEXT: cmovlel %edx, %eax ; CHECK-NEXT: retq entry: %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst @@ -32,9 +32,9 @@ define i32 @test_sub_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 { ; CHECK-LABEL: test_sub_1_cmov_sle: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: lock decq (%rdi) -; CHECK-NEXT: cmovgel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: lock decq (%rdi) +; CHECK-NEXT: cmovgel %edx, %eax ; CHECK-NEXT: retq entry: %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst @@ -46,9 +46,9 @@ define i32 @test_sub_1_cmov_sgt(i64* %p, i32 %a0, i32 %a1) #0 { ; CHECK-LABEL: test_sub_1_cmov_sgt: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: lock decq (%rdi) -; CHECK-NEXT: cmovll %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: lock decq (%rdi) +; CHECK-NEXT: cmovll %edx, %eax ; CHECK-NEXT: retq entry: %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst @@ -113,11 +113,11 @@ define i32 @test_add_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 { ; CHECK-LABEL: test_add_1_cmov_sle: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: lock xaddq %rax, (%rdi) -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: cmovgl %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: lock xaddq %rcx, (%rdi) +; CHECK-NEXT: testq %rcx, %rcx +; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: retq entry: %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst @@ -129,11 +129,11 @@ define i32 @test_add_1_cmov_sgt(i64* %p, i32 %a0, i32 %a1) #0 { ; CHECK-LABEL: test_add_1_cmov_sgt: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: lock xaddq %rax, (%rdi) -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: cmovlel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: lock xaddq %rcx, (%rdi) +; CHECK-NEXT: testq %rcx, %rcx +; CHECK-NEXT: cmovlel %edx, %eax ; CHECK-NEXT: retq entry: %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst Index: test/CodeGen/X86/atomic128.ll =================================================================== --- test/CodeGen/X86/atomic128.ll +++ test/CodeGen/X86/atomic128.ll @@ -12,10 +12,9 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset %rbx, -16 -; CHECK-NEXT: movq %rcx, %r9 +; CHECK-NEXT: movq %rcx, %rbx ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: movq %r8, %rcx -; CHECK-NEXT: movq %r9, %rbx ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq Index: test/CodeGen/X86/avg.ll =================================================================== --- test/CodeGen/X86/avg.ll +++ test/CodeGen/X86/avg.ll @@ -270,180 +270,180 @@ ; SSE2: # BB#0: ; SSE2-NEXT: movdqa (%rdi), %xmm6 ; SSE2-NEXT: movdqa 16(%rdi), %xmm2 -; SSE2-NEXT: movdqa 32(%rdi), %xmm1 -; SSE2-NEXT: movdqa 48(%rdi), %xmm0 -; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) # 16-byte Spill -; SSE2-NEXT: movdqa (%rsi), %xmm5 -; SSE2-NEXT: movdqa 16(%rsi), %xmm13 +; SSE2-NEXT: movdqa 32(%rdi), %xmm8 +; SSE2-NEXT: movdqa 48(%rdi), %xmm13 +; SSE2-NEXT: movdqa (%rsi), %xmm9 +; SSE2-NEXT: movdqa 16(%rsi), %xmm10 ; SSE2-NEXT: movdqa 32(%rsi), %xmm11 -; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: movdqa %xmm6, %xmm4 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15] ; SSE2-NEXT: movdqa %xmm4, %xmm7 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm7 = xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3],xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm7 = xmm7[4],xmm1[4],xmm7[5],xmm1[5],xmm7[6],xmm1[6],xmm7[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm1[0],xmm6[1],xmm1[1],xmm6[2],xmm1[2],xmm6[3],xmm1[3],xmm6[4],xmm1[4],xmm6[5],xmm1[5],xmm6[6],xmm1[6],xmm6[7],xmm1[7] ; SSE2-NEXT: movdqa %xmm6, %xmm12 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm12 = xmm12[4],xmm0[4],xmm12[5],xmm0[5],xmm12[6],xmm0[6],xmm12[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm12 = xmm12[4],xmm1[4],xmm12[5],xmm1[5],xmm12[6],xmm1[6],xmm12[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm1[0],xmm6[1],xmm1[1],xmm6[2],xmm1[2],xmm6[3],xmm1[3] ; SSE2-NEXT: movdqa %xmm2, %xmm15 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm15 = xmm15[8],xmm0[8],xmm15[9],xmm0[9],xmm15[10],xmm0[10],xmm15[11],xmm0[11],xmm15[12],xmm0[12],xmm15[13],xmm0[13],xmm15[14],xmm0[14],xmm15[15],xmm0[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm15 = xmm15[8],xmm1[8],xmm15[9],xmm1[9],xmm15[10],xmm1[10],xmm15[11],xmm1[11],xmm15[12],xmm1[12],xmm15[13],xmm1[13],xmm15[14],xmm1[14],xmm15[15],xmm1[15] ; SSE2-NEXT: movdqa %xmm15, %xmm14 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm14 = xmm14[4],xmm0[4],xmm14[5],xmm0[5],xmm14[6],xmm0[6],xmm14[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: movdqa %xmm2, %xmm8 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm8 = xmm8[4],xmm0[4],xmm8[5],xmm0[5],xmm8[6],xmm0[6],xmm8[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: movdqa %xmm5, %xmm10 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm10 = xmm10[8],xmm0[8],xmm10[9],xmm0[9],xmm10[10],xmm0[10],xmm10[11],xmm0[11],xmm10[12],xmm0[12],xmm10[13],xmm0[13],xmm10[14],xmm0[14],xmm10[15],xmm0[15] -; SSE2-NEXT: movdqa %xmm10, %xmm3 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-NEXT: paddd %xmm7, %xmm3 -; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) # 16-byte Spill -; SSE2-NEXT: movdqa %xmm1, %xmm7 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm0[8],xmm7[9],xmm0[9],xmm7[10],xmm0[10],xmm7[11],xmm0[11],xmm7[12],xmm0[12],xmm7[13],xmm0[13],xmm7[14],xmm0[14],xmm7[15],xmm0[15] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm0[0],xmm10[1],xmm0[1],xmm10[2],xmm0[2],xmm10[3],xmm0[3] -; SSE2-NEXT: paddd %xmm4, %xmm10 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3],xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] -; SSE2-NEXT: movdqa %xmm5, %xmm3 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-NEXT: paddd %xmm12, %xmm3 -; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) # 16-byte Spill -; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3] -; SSE2-NEXT: paddd %xmm6, %xmm5 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm14 = xmm14[4],xmm1[4],xmm14[5],xmm1[5],xmm14[6],xmm1[6],xmm14[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm15 = xmm15[0],xmm1[0],xmm15[1],xmm1[1],xmm15[2],xmm1[2],xmm15[3],xmm1[3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-NEXT: movdqa %xmm9, %xmm3 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] +; SSE2-NEXT: movdqa %xmm3, %xmm5 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] +; SSE2-NEXT: paddd %xmm7, %xmm5 ; SSE2-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) # 16-byte Spill -; SSE2-NEXT: movdqa %xmm13, %xmm4 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15] +; SSE2-NEXT: movdqa %xmm8, %xmm7 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm1[8],xmm7[9],xmm1[9],xmm7[10],xmm1[10],xmm7[11],xmm1[11],xmm7[12],xmm1[12],xmm7[13],xmm1[13],xmm7[14],xmm1[14],xmm7[15],xmm1[15] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSE2-NEXT: paddd %xmm4, %xmm3 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm9 = xmm9[0],xmm1[0],xmm9[1],xmm1[1],xmm9[2],xmm1[2],xmm9[3],xmm1[3],xmm9[4],xmm1[4],xmm9[5],xmm1[5],xmm9[6],xmm1[6],xmm9[7],xmm1[7] +; SSE2-NEXT: movdqa %xmm9, %xmm4 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] +; SSE2-NEXT: paddd %xmm12, %xmm4 +; SSE2-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: punpcklwd {{.*#+}} xmm9 = xmm9[0],xmm1[0],xmm9[1],xmm1[1],xmm9[2],xmm1[2],xmm9[3],xmm1[3] +; SSE2-NEXT: paddd %xmm6, %xmm9 +; SSE2-NEXT: movdqa %xmm9, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm10, %xmm4 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15] ; SSE2-NEXT: movdqa %xmm4, %xmm12 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm12 = xmm12[4],xmm0[4],xmm12[5],xmm0[5],xmm12[6],xmm0[6],xmm12[7],xmm0[7] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm12 = xmm12[4],xmm1[4],xmm12[5],xmm1[5],xmm12[6],xmm1[6],xmm12[7],xmm1[7] ; SSE2-NEXT: paddd %xmm14, %xmm12 ; SSE2-NEXT: movdqa %xmm7, %xmm5 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1],xmm7[2],xmm0[2],xmm7[3],xmm0[3] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm1[0],xmm7[1],xmm1[1],xmm7[2],xmm1[2],xmm7[3],xmm1[3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm8 = xmm8[0],xmm1[0],xmm8[1],xmm1[1],xmm8[2],xmm1[2],xmm8[3],xmm1[3],xmm8[4],xmm1[4],xmm8[5],xmm1[5],xmm8[6],xmm1[6],xmm8[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] ; SSE2-NEXT: paddd %xmm15, %xmm4 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm13 = xmm13[0],xmm0[0],xmm13[1],xmm0[1],xmm13[2],xmm0[2],xmm13[3],xmm0[3],xmm13[4],xmm0[4],xmm13[5],xmm0[5],xmm13[6],xmm0[6],xmm13[7],xmm0[7] -; SSE2-NEXT: movdqa %xmm13, %xmm15 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm15 = xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7] -; SSE2-NEXT: paddd %xmm8, %xmm15 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm13 = xmm13[0],xmm0[0],xmm13[1],xmm0[1],xmm13[2],xmm0[2],xmm13[3],xmm0[3] -; SSE2-NEXT: paddd %xmm2, %xmm13 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm10 = xmm10[0],xmm1[0],xmm10[1],xmm1[1],xmm10[2],xmm1[2],xmm10[3],xmm1[3],xmm10[4],xmm1[4],xmm10[5],xmm1[5],xmm10[6],xmm1[6],xmm10[7],xmm1[7] +; SSE2-NEXT: movdqa %xmm10, %xmm15 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm15 = xmm15[4],xmm1[4],xmm15[5],xmm1[5],xmm15[6],xmm1[6],xmm15[7],xmm1[7] +; SSE2-NEXT: paddd %xmm0, %xmm15 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm1[0],xmm10[1],xmm1[1],xmm10[2],xmm1[2],xmm10[3],xmm1[3] +; SSE2-NEXT: paddd %xmm2, %xmm10 +; SSE2-NEXT: movdqa %xmm10, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm11, %xmm6 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm0[8],xmm6[9],xmm0[9],xmm6[10],xmm0[10],xmm6[11],xmm0[11],xmm6[12],xmm0[12],xmm6[13],xmm0[13],xmm6[14],xmm0[14],xmm6[15],xmm0[15] -; SSE2-NEXT: movdqa %xmm6, %xmm9 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm9 = xmm9[4],xmm0[4],xmm9[5],xmm0[5],xmm9[6],xmm0[6],xmm9[7],xmm0[7] -; SSE2-NEXT: paddd %xmm5, %xmm9 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm1[8],xmm6[9],xmm1[9],xmm6[10],xmm1[10],xmm6[11],xmm1[11],xmm6[12],xmm1[12],xmm6[13],xmm1[13],xmm6[14],xmm1[14],xmm6[15],xmm1[15] +; SSE2-NEXT: movdqa %xmm6, %xmm10 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm10 = xmm10[4],xmm1[4],xmm10[5],xmm1[5],xmm10[6],xmm1[6],xmm10[7],xmm1[7] +; SSE2-NEXT: paddd %xmm5, %xmm10 +; SSE2-NEXT: movdqa %xmm8, %xmm2 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm1[0],xmm8[1],xmm1[1],xmm8[2],xmm1[2],xmm8[3],xmm1[3] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm1[0],xmm6[1],xmm1[1],xmm6[2],xmm1[2],xmm6[3],xmm1[3] ; SSE2-NEXT: paddd %xmm7, %xmm6 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm11 = xmm11[0],xmm0[0],xmm11[1],xmm0[1],xmm11[2],xmm0[2],xmm11[3],xmm0[3],xmm11[4],xmm0[4],xmm11[5],xmm0[5],xmm11[6],xmm0[6],xmm11[7],xmm0[7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm11 = xmm11[0],xmm1[0],xmm11[1],xmm1[1],xmm11[2],xmm1[2],xmm11[3],xmm1[3],xmm11[4],xmm1[4],xmm11[5],xmm1[5],xmm11[6],xmm1[6],xmm11[7],xmm1[7] ; SSE2-NEXT: movdqa %xmm11, %xmm14 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm14 = xmm14[4],xmm0[4],xmm14[5],xmm0[5],xmm14[6],xmm0[6],xmm14[7],xmm0[7] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm14 = xmm14[4],xmm1[4],xmm14[5],xmm1[5],xmm14[6],xmm1[6],xmm14[7],xmm1[7] ; SSE2-NEXT: paddd %xmm2, %xmm14 -; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Reload -; SSE2-NEXT: movdqa %xmm5, %xmm2 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm11 = xmm11[0],xmm0[0],xmm11[1],xmm0[1],xmm11[2],xmm0[2],xmm11[3],xmm0[3] -; SSE2-NEXT: paddd %xmm1, %xmm11 -; SSE2-NEXT: movdqa %xmm2, %xmm1 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: movdqa %xmm13, %xmm0 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm11 = xmm11[0],xmm1[0],xmm11[1],xmm1[1],xmm11[2],xmm1[2],xmm11[3],xmm1[3] +; SSE2-NEXT: paddd %xmm8, %xmm11 +; SSE2-NEXT: movdqa %xmm0, %xmm5 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] ; SSE2-NEXT: movdqa 48(%rsi), %xmm7 -; SSE2-NEXT: movdqa %xmm7, %xmm3 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15] -; SSE2-NEXT: movdqa %xmm3, %xmm8 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm8 = xmm8[4],xmm0[4],xmm8[5],xmm0[5],xmm8[6],xmm0[6],xmm8[7],xmm0[7] -; SSE2-NEXT: paddd %xmm1, %xmm8 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] -; SSE2-NEXT: paddd %xmm2, %xmm3 -; SSE2-NEXT: movdqa %xmm5, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: movdqa %xmm2, %xmm1 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1],xmm7[2],xmm0[2],xmm7[3],xmm0[3],xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7] -; SSE2-NEXT: movdqa %xmm7, %xmm5 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] -; SSE2-NEXT: paddd %xmm1, %xmm5 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1],xmm7[2],xmm0[2],xmm7[3],xmm0[3] -; SSE2-NEXT: paddd %xmm2, %xmm7 +; SSE2-NEXT: movdqa %xmm7, %xmm2 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] +; SSE2-NEXT: movdqa %xmm2, %xmm9 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm9 = xmm9[4],xmm1[4],xmm9[5],xmm1[5],xmm9[6],xmm1[6],xmm9[7],xmm1[7] +; SSE2-NEXT: paddd %xmm5, %xmm9 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-NEXT: paddd %xmm0, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm13 = xmm13[0],xmm1[0],xmm13[1],xmm1[1],xmm13[2],xmm1[2],xmm13[3],xmm1[3],xmm13[4],xmm1[4],xmm13[5],xmm1[5],xmm13[6],xmm1[6],xmm13[7],xmm1[7] +; SSE2-NEXT: movdqa %xmm13, %xmm0 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm1[0],xmm7[1],xmm1[1],xmm7[2],xmm1[2],xmm7[3],xmm1[3],xmm7[4],xmm1[4],xmm7[5],xmm1[5],xmm7[6],xmm1[6],xmm7[7],xmm1[7] +; SSE2-NEXT: movdqa %xmm7, %xmm8 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm8 = xmm8[4],xmm1[4],xmm8[5],xmm1[5],xmm8[6],xmm1[6],xmm8[7],xmm1[7] +; SSE2-NEXT: paddd %xmm0, %xmm8 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm13 = xmm13[0],xmm1[0],xmm13[1],xmm1[1],xmm13[2],xmm1[2],xmm13[3],xmm1[3] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm1[0],xmm7[1],xmm1[1],xmm7[2],xmm1[2],xmm7[3],xmm1[3] +; SSE2-NEXT: paddd %xmm13, %xmm7 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload ; SSE2-NEXT: psubd %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) # 16-byte Spill -; SSE2-NEXT: psubd %xmm0, %xmm10 -; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload -; SSE2-NEXT: psubd %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) # 16-byte Spill -; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload -; SSE2-NEXT: psubd %xmm0, %xmm2 +; SSE2-NEXT: psubd %xmm0, %xmm3 +; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload +; SSE2-NEXT: psubd %xmm0, %xmm13 +; SSE2-NEXT: movdqa %xmm13, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Reload +; SSE2-NEXT: psubd %xmm0, %xmm5 +; SSE2-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: psubd %xmm0, %xmm12 ; SSE2-NEXT: psubd %xmm0, %xmm4 ; SSE2-NEXT: psubd %xmm0, %xmm15 -; SSE2-NEXT: psubd %xmm0, %xmm13 -; SSE2-NEXT: psubd %xmm0, %xmm9 +; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Reload +; SSE2-NEXT: psubd %xmm0, %xmm5 +; SSE2-NEXT: psubd %xmm0, %xmm10 ; SSE2-NEXT: psubd %xmm0, %xmm6 ; SSE2-NEXT: psubd %xmm0, %xmm14 ; SSE2-NEXT: psubd %xmm0, %xmm11 +; SSE2-NEXT: psubd %xmm0, %xmm9 +; SSE2-NEXT: psubd %xmm0, %xmm2 ; SSE2-NEXT: psubd %xmm0, %xmm8 -; SSE2-NEXT: psubd %xmm0, %xmm3 -; SSE2-NEXT: psubd %xmm0, %xmm5 ; SSE2-NEXT: psubd %xmm0, %xmm7 -; SSE2-NEXT: psrld $1, %xmm10 -; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; SSE2-NEXT: psrld $1, %xmm3 ; SSE2-NEXT: psrld $1, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: pand %xmm0, %xmm10 -; SSE2-NEXT: packuswb %xmm1, %xmm10 -; SSE2-NEXT: psrld $1, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm13 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; SSE2-NEXT: pand %xmm13, %xmm1 +; SSE2-NEXT: pand %xmm13, %xmm3 +; SSE2-NEXT: packuswb %xmm1, %xmm3 +; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: psrld $1, %xmm0 ; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload ; SSE2-NEXT: psrld $1, %xmm1 -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: packuswb %xmm1, %xmm2 -; SSE2-NEXT: packuswb %xmm10, %xmm2 -; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: pand %xmm13, %xmm1 +; SSE2-NEXT: pand %xmm13, %xmm0 +; SSE2-NEXT: packuswb %xmm1, %xmm0 +; SSE2-NEXT: packuswb %xmm3, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrld $1, %xmm4 ; SSE2-NEXT: psrld $1, %xmm12 -; SSE2-NEXT: pand %xmm0, %xmm12 -; SSE2-NEXT: pand %xmm0, %xmm4 +; SSE2-NEXT: pand %xmm13, %xmm12 +; SSE2-NEXT: pand %xmm13, %xmm4 ; SSE2-NEXT: packuswb %xmm12, %xmm4 -; SSE2-NEXT: psrld $1, %xmm13 +; SSE2-NEXT: movdqa %xmm5, %xmm0 +; SSE2-NEXT: psrld $1, %xmm0 ; SSE2-NEXT: psrld $1, %xmm15 -; SSE2-NEXT: pand %xmm0, %xmm15 -; SSE2-NEXT: pand %xmm0, %xmm13 -; SSE2-NEXT: packuswb %xmm15, %xmm13 -; SSE2-NEXT: packuswb %xmm4, %xmm13 +; SSE2-NEXT: pand %xmm13, %xmm15 +; SSE2-NEXT: pand %xmm13, %xmm0 +; SSE2-NEXT: packuswb %xmm15, %xmm0 +; SSE2-NEXT: packuswb %xmm4, %xmm0 ; SSE2-NEXT: psrld $1, %xmm6 -; SSE2-NEXT: psrld $1, %xmm9 -; SSE2-NEXT: pand %xmm0, %xmm9 -; SSE2-NEXT: pand %xmm0, %xmm6 -; SSE2-NEXT: packuswb %xmm9, %xmm6 +; SSE2-NEXT: psrld $1, %xmm10 +; SSE2-NEXT: pand %xmm13, %xmm10 +; SSE2-NEXT: pand %xmm13, %xmm6 +; SSE2-NEXT: packuswb %xmm10, %xmm6 ; SSE2-NEXT: psrld $1, %xmm11 ; SSE2-NEXT: psrld $1, %xmm14 -; SSE2-NEXT: pand %xmm0, %xmm14 -; SSE2-NEXT: pand %xmm0, %xmm11 +; SSE2-NEXT: pand %xmm13, %xmm14 +; SSE2-NEXT: pand %xmm13, %xmm11 ; SSE2-NEXT: packuswb %xmm14, %xmm11 ; SSE2-NEXT: packuswb %xmm6, %xmm11 -; SSE2-NEXT: psrld $1, %xmm3 -; SSE2-NEXT: psrld $1, %xmm8 -; SSE2-NEXT: pand %xmm0, %xmm8 -; SSE2-NEXT: pand %xmm0, %xmm3 -; SSE2-NEXT: packuswb %xmm8, %xmm3 +; SSE2-NEXT: psrld $1, %xmm2 +; SSE2-NEXT: psrld $1, %xmm9 +; SSE2-NEXT: pand %xmm13, %xmm9 +; SSE2-NEXT: pand %xmm13, %xmm2 +; SSE2-NEXT: packuswb %xmm9, %xmm2 ; SSE2-NEXT: psrld $1, %xmm7 -; SSE2-NEXT: psrld $1, %xmm5 -; SSE2-NEXT: pand %xmm0, %xmm5 -; SSE2-NEXT: pand %xmm0, %xmm7 -; SSE2-NEXT: packuswb %xmm5, %xmm7 -; SSE2-NEXT: packuswb %xmm3, %xmm7 +; SSE2-NEXT: psrld $1, %xmm8 +; SSE2-NEXT: pand %xmm13, %xmm8 +; SSE2-NEXT: pand %xmm13, %xmm7 +; SSE2-NEXT: packuswb %xmm8, %xmm7 +; SSE2-NEXT: packuswb %xmm2, %xmm7 ; SSE2-NEXT: movdqu %xmm7, (%rax) ; SSE2-NEXT: movdqu %xmm11, (%rax) -; SSE2-NEXT: movdqu %xmm13, (%rax) +; SSE2-NEXT: movdqu %xmm0, (%rax) ; SSE2-NEXT: movdqu %xmm1, (%rax) ; SSE2-NEXT: retq ; @@ -1406,8 +1406,9 @@ ; SSE2-NEXT: punpckhwd {{.*#+}} xmm11 = xmm11[4],xmm0[4],xmm11[5],xmm0[5],xmm11[6],xmm0[6],xmm11[7],xmm0[7] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: movdqa %xmm2, %xmm10 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm10 = xmm10[4],xmm0[4],xmm10[5],xmm0[5],xmm10[6],xmm0[6],xmm10[7],xmm0[7] +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; SSE2-NEXT: movdqa %xmm3, %xmm10 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] ; SSE2-NEXT: movdqa %xmm1, %xmm4 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15] @@ -1449,12 +1450,13 @@ ; SSE2-NEXT: psubd %xmm0, %xmm11 ; SSE2-NEXT: psubd %xmm0, %xmm5 ; SSE2-NEXT: psubd %xmm0, %xmm10 +; SSE2-NEXT: movdqa %xmm10, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: psubd %xmm0, %xmm2 ; SSE2-NEXT: psubd %xmm0, %xmm3 -; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: psubd %xmm0, %xmm4 -; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload -; SSE2-NEXT: psubd %xmm0, %xmm3 +; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload +; SSE2-NEXT: psubd %xmm0, %xmm10 +; SSE2-NEXT: movdqa %xmm10, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: psubd %xmm0, %xmm1 ; SSE2-NEXT: psrld $1, %xmm7 ; SSE2-NEXT: psrld $1, %xmm15 @@ -1485,19 +1487,19 @@ ; SSE2-NEXT: pand %xmm0, %xmm5 ; SSE2-NEXT: packuswb %xmm11, %xmm5 ; SSE2-NEXT: psrld $1, %xmm2 -; SSE2-NEXT: psrld $1, %xmm10 -; SSE2-NEXT: pand %xmm0, %xmm10 +; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm6 # 16-byte Reload +; SSE2-NEXT: psrld $1, %xmm6 +; SSE2-NEXT: pand %xmm0, %xmm6 ; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: packuswb %xmm10, %xmm2 +; SSE2-NEXT: packuswb %xmm6, %xmm2 ; SSE2-NEXT: packuswb %xmm5, %xmm2 ; SSE2-NEXT: psrld $1, %xmm4 -; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Reload -; SSE2-NEXT: psrld $1, %xmm5 -; SSE2-NEXT: pand %xmm0, %xmm5 +; SSE2-NEXT: psrld $1, %xmm3 +; SSE2-NEXT: pand %xmm0, %xmm3 ; SSE2-NEXT: pand %xmm0, %xmm4 -; SSE2-NEXT: packuswb %xmm5, %xmm4 +; SSE2-NEXT: packuswb %xmm3, %xmm4 ; SSE2-NEXT: psrld $1, %xmm1 -; SSE2-NEXT: movdqa %xmm3, %xmm5 +; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Reload ; SSE2-NEXT: psrld $1, %xmm5 ; SSE2-NEXT: pand %xmm0, %xmm5 ; SSE2-NEXT: pand %xmm0, %xmm1 Index: test/CodeGen/X86/avx-intel-ocl.ll =================================================================== --- test/CodeGen/X86/avx-intel-ocl.ll +++ test/CodeGen/X86/avx-intel-ocl.ll @@ -122,8 +122,8 @@ ; pass parameters in registers for 64-bit platform ; X64-LABEL: test_int -; X64: leal {{.*}}, %edi ; X64: movl {{.*}}, %esi +; X64: leal {{.*}}, %edi ; X64: call ; X64: addl {{.*}}, %eax define i32 @test_int(i32 %a, i32 %b) nounwind { Index: test/CodeGen/X86/avx-vinsertf128.ll =================================================================== --- test/CodeGen/X86/avx-vinsertf128.ll +++ test/CodeGen/X86/avx-vinsertf128.ll @@ -75,8 +75,7 @@ define <4 x double> @insert_undef_pd(<4 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: insert_undef_pd: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %XMM1 %XMM1 %YMM1 -; CHECK-NEXT: vmovaps %ymm1, %ymm0 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> undef, <2 x double> %a1, i8 0) ret <4 x double> %res @@ -86,8 +85,7 @@ define <8 x float> @insert_undef_ps(<8 x float> %a0, <4 x float> %a1) { ; CHECK-LABEL: insert_undef_ps: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %XMM1 %XMM1 %YMM1 -; CHECK-NEXT: vmovaps %ymm1, %ymm0 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %a1, i8 0) ret <8 x float> %res @@ -97,8 +95,7 @@ define <8 x i32> @insert_undef_si(<8 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: insert_undef_si: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %XMM1 %XMM1 %YMM1 -; CHECK-NEXT: vmovaps %ymm1, %ymm0 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> undef, <4 x i32> %a1, i8 0) ret <8 x i32> %res Index: test/CodeGen/X86/avx512-arith.ll =================================================================== --- test/CodeGen/X86/avx512-arith.ll +++ test/CodeGen/X86/avx512-arith.ll @@ -930,10 +930,10 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, ; CHECK-LABEL: test_mask_broadcast_vaddpd: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vpcmpneqq %zmm0, %zmm2, %k1 -; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} -; CHECK-NEXT: vmovapd %zmm1, %zmm0 +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpneqq %zmm1, %zmm2, %k1 +; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} ; CHECK-NEXT: retq double* %j, <8 x i64> %mask1) nounwind { %mask = icmp ne <8 x i64> %mask1, zeroinitializer Index: test/CodeGen/X86/avx512-calling-conv.ll =================================================================== --- test/CodeGen/X86/avx512-calling-conv.ll +++ test/CodeGen/X86/avx512-calling-conv.ll @@ -358,9 +358,9 @@ define i32 @test10(i32 %a, i32 %b, i1 %cond) { ; ALL_X64-LABEL: test10: ; ALL_X64: ## BB#0: -; ALL_X64-NEXT: testb $1, %dl -; ALL_X64-NEXT: cmovel %esi, %edi ; ALL_X64-NEXT: movl %edi, %eax +; ALL_X64-NEXT: testb $1, %dl +; ALL_X64-NEXT: cmovel %esi, %eax ; ALL_X64-NEXT: retq ; ; KNL_X32-LABEL: test10: Index: test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- test/CodeGen/X86/avx512-insert-extract.ll +++ test/CodeGen/X86/avx512-insert-extract.ll @@ -197,25 +197,25 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) { ; KNL-LABEL: test12: ; KNL: ## BB#0: +; KNL-NEXT: movq %rdi, %rax ; KNL-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 ; KNL-NEXT: kshiftlw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: testb $1, %al -; KNL-NEXT: cmoveq %rsi, %rdi -; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: testb $1, %cl +; KNL-NEXT: cmoveq %rsi, %rax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test12: ; SKX: ## BB#0: +; SKX-NEXT: movq %rdi, %rax ; SKX-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 ; SKX-NEXT: kshiftlb $7, %k0, %k0 ; SKX-NEXT: kshiftrb $7, %k0, %k0 -; SKX-NEXT: kmovd %k0, %eax -; SKX-NEXT: testb $1, %al -; SKX-NEXT: cmoveq %rsi, %rdi -; SKX-NEXT: movq %rdi, %rax +; SKX-NEXT: kmovd %k0, %ecx +; SKX-NEXT: testb $1, %cl +; SKX-NEXT: cmoveq %rsi, %rax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %cmpvector_func.i = icmp slt <16 x i64> %a, %b @@ -263,25 +263,25 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) { ; KNL-LABEL: test14: ; KNL: ## BB#0: +; KNL-NEXT: movq %rdi, %rax ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 ; KNL-NEXT: kshiftlw $11, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: testb $1, %al -; KNL-NEXT: cmoveq %rsi, %rdi -; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: testb $1, %cl +; KNL-NEXT: cmoveq %rsi, %rax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test14: ; SKX: ## BB#0: +; SKX-NEXT: movq %rdi, %rax ; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 ; SKX-NEXT: kshiftlb $3, %k0, %k0 ; SKX-NEXT: kshiftrb $7, %k0, %k0 -; SKX-NEXT: kmovd %k0, %eax -; SKX-NEXT: testb $1, %al -; SKX-NEXT: cmoveq %rsi, %rdi -; SKX-NEXT: movq %rdi, %rax +; SKX-NEXT: kmovd %k0, %ecx +; SKX-NEXT: testb $1, %cl +; SKX-NEXT: cmoveq %rsi, %rax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %cmpvector_func.i = icmp slt <8 x i64> %a, %b Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -212,10 +212,11 @@ ; CHECK-LABEL: mand16: ; CHECK: ## BB#0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: xorl %esi, %eax -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: xorl %esi, %ecx +; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %ma = bitcast i16 %x to <16 x i1> %mb = bitcast i16 %y to <16 x i1> Index: test/CodeGen/X86/avx512-schedule.ll =================================================================== --- test/CodeGen/X86/avx512-schedule.ll +++ test/CodeGen/X86/avx512-schedule.ll @@ -675,10 +675,10 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, ; CHECK-LABEL: test_mask_broadcast_vaddpd: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; CHECK-NEXT: vpcmpneqq %zmm0, %zmm2, %k1 # sched: [3:1.00] -; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [11:0.50] -; CHECK-NEXT: vmovapd %zmm1, %zmm0 +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqq %zmm1, %zmm2, %k1 # sched: [3:1.00] +; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} # sched: [11:0.50] ; CHECK-NEXT: retq # sched: [7:1.00] double* %j, <8 x i64> %mask1) nounwind { %mask = icmp ne <8 x i64> %mask1, zeroinitializer @@ -4770,10 +4770,11 @@ ; CHECK-LABEL: mand16: ; CHECK: # BB#0: ; CHECK-NEXT: movl %edi, %eax # sched: [1:0.25] -; CHECK-NEXT: xorl %esi, %eax # sched: [1:0.25] -; CHECK-NEXT: andl %esi, %edi # sched: [1:0.25] -; CHECK-NEXT: orl %eax, %edi # sched: [1:0.25] -; CHECK-NEXT: movl %edi, %eax # sched: [1:0.25] +; CHECK-NEXT: movl %eax, %ecx # sched: [1:0.25] +; CHECK-NEXT: xorl %esi, %ecx # sched: [1:0.25] +; CHECK-NEXT: andl %esi, %eax # sched: [1:0.25] +; CHECK-NEXT: orl %ecx, %eax # sched: [1:0.25] +; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: retq # sched: [7:1.00] %ma = bitcast i16 %x to <16 x i1> %mb = bitcast i16 %y to <16 x i1> Index: test/CodeGen/X86/avx512-select.ll =================================================================== --- test/CodeGen/X86/avx512-select.ll +++ test/CodeGen/X86/avx512-select.ll @@ -134,8 +134,9 @@ ; ; X64-LABEL: select05: ; X64: # BB#0: -; X64-NEXT: orl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq %mask = bitcast i8 %m to <8 x i1> %a = bitcast i8 %a.0 to <8 x i1> @@ -184,8 +185,9 @@ ; ; X64-LABEL: select06: ; X64: # BB#0: -; X64-NEXT: andl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq %mask = bitcast i8 %m to <8 x i1> %a = bitcast i8 %a.0 to <8 x i1> Index: test/CodeGen/X86/avx512bw-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512bw-mask-op.ll +++ test/CodeGen/X86/avx512bw-mask-op.ll @@ -81,10 +81,10 @@ ; CHECK-LABEL: mand32: ; CHECK: ## BB#0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: andl %esi, %eax -; CHECK-NEXT: xorl %esi, %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: andl %esi, %ecx +; CHECK-NEXT: xorl %esi, %eax +; CHECK-NEXT: orl %ecx, %eax ; CHECK-NEXT: retq %ma = bitcast i32 %x to <32 x i1> %mb = bitcast i32 %y to <32 x i1> @@ -118,10 +118,10 @@ ; CHECK-LABEL: mand64: ; CHECK: ## BB#0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: andq %rsi, %rax -; CHECK-NEXT: xorq %rsi, %rdi -; CHECK-NEXT: orq %rax, %rdi -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: andq %rsi, %rcx +; CHECK-NEXT: xorq %rsi, %rax +; CHECK-NEXT: orq %rcx, %rax ; CHECK-NEXT: retq %ma = bitcast i64 %x to <64 x i1> %mb = bitcast i64 %y to <64 x i1> Index: test/CodeGen/X86/avx512dq-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512dq-mask-op.ll +++ test/CodeGen/X86/avx512dq-mask-op.ll @@ -34,10 +34,11 @@ ; CHECK-LABEL: mand8: ; CHECK: ## BB#0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: xorl %esi, %eax -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: xorl %esi, %ecx +; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %ma = bitcast i8 %x to <8 x i1> %mb = bitcast i8 %y to <8 x i1> Index: test/CodeGen/X86/avx512vl-arith.ll =================================================================== --- test/CodeGen/X86/avx512vl-arith.ll +++ test/CodeGen/X86/avx512vl-arith.ll @@ -429,10 +429,10 @@ define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i, double* %j, <4 x i64> %mask1) nounwind { ; CHECK-LABEL: test_mask_broadcast_vaddpd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] -; CHECK-NEXT: vpcmpneqq %ymm0, %ymm2, %k1 ## encoding: [0x62,0xf3,0xed,0x28,0x1f,0xc8,0x04] -; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm1, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x39,0x58,0x0f] -; CHECK-NEXT: vmovapd %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] +; CHECK-NEXT: vpcmpneqq %ymm1, %ymm2, %k1 ## encoding: [0x62,0xf3,0xed,0x28,0x1f,0xc9,0x04] +; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0x58,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %tmp = load double, double* %j @@ -879,10 +879,10 @@ define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i, double* %j, <2 x i64> %mask1) nounwind { ; CHECK-LABEL: test_mask_broadcast_vaddpd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] -; CHECK-NEXT: vpcmpneqq %xmm0, %xmm2, %k1 ## encoding: [0x62,0xf3,0xed,0x08,0x1f,0xc8,0x04] -; CHECK-NEXT: vaddpd (%rdi){1to2}, %xmm1, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x19,0x58,0x0f] -; CHECK-NEXT: vmovapd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] +; CHECK-NEXT: vpcmpneqq %xmm1, %xmm2, %k1 ## encoding: [0x62,0xf3,0xed,0x08,0x1f,0xc9,0x04] +; CHECK-NEXT: vaddpd (%rdi){1to2}, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0x58,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %tmp = load double, double* %j Index: test/CodeGen/X86/bigstructret.ll =================================================================== --- test/CodeGen/X86/bigstructret.ll +++ test/CodeGen/X86/bigstructret.ll @@ -8,20 +8,20 @@ define fastcc %0 @ReturnBigStruct() nounwind readnone { ; X86-LABEL: ReturnBigStruct: ; X86: # BB#0: # %entry -; X86-NEXT: movl $24601, 12(%ecx) # imm = 0x6019 -; X86-NEXT: movl $48, 8(%ecx) -; X86-NEXT: movl $24, 4(%ecx) -; X86-NEXT: movl $12, (%ecx) ; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl $24601, 12(%eax) # imm = 0x6019 +; X86-NEXT: movl $48, 8(%eax) +; X86-NEXT: movl $24, 4(%eax) +; X86-NEXT: movl $12, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: ReturnBigStruct: ; X64: # BB#0: # %entry -; X64-NEXT: movabsq $105660490448944, %rax # imm = 0x601900000030 -; X64-NEXT: movq %rax, 8(%rdi) -; X64-NEXT: movabsq $103079215116, %rax # imm = 0x180000000C -; X64-NEXT: movq %rax, (%rdi) ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movabsq $105660490448944, %rcx # imm = 0x601900000030 +; X64-NEXT: movq %rcx, 8(%rax) +; X64-NEXT: movabsq $103079215116, %rcx # imm = 0x180000000C +; X64-NEXT: movq %rcx, (%rax) ; X64-NEXT: retq entry: %0 = insertvalue %0 zeroinitializer, i32 12, 0 @@ -35,18 +35,18 @@ define fastcc %1 @ReturnBigStruct2() nounwind readnone { ; X86-LABEL: ReturnBigStruct2: ; X86: # BB#0: # %entry -; X86-NEXT: movl $48, 4(%ecx) -; X86-NEXT: movb $1, 2(%ecx) -; X86-NEXT: movw $256, (%ecx) # imm = 0x100 ; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl $48, 4(%eax) +; X86-NEXT: movb $1, 2(%eax) +; X86-NEXT: movw $256, (%eax) # imm = 0x100 ; X86-NEXT: retl ; ; X64-LABEL: ReturnBigStruct2: ; X64: # BB#0: # %entry -; X64-NEXT: movl $48, 4(%rdi) -; X64-NEXT: movb $1, 2(%rdi) -; X64-NEXT: movw $256, (%rdi) # imm = 0x100 ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movl $48, 4(%rax) +; X64-NEXT: movb $1, 2(%rax) +; X64-NEXT: movw $256, (%rax) # imm = 0x100 ; X64-NEXT: retq entry: %0 = insertvalue %1 zeroinitializer, i1 false, 0 Index: test/CodeGen/X86/bitcast-i256.ll =================================================================== --- test/CodeGen/X86/bitcast-i256.ll +++ test/CodeGen/X86/bitcast-i256.ll @@ -5,16 +5,16 @@ define i256 @foo(<8 x i32> %a) { ; FAST-LABEL: foo: ; FAST: # BB#0: -; FAST-NEXT: vmovups %ymm0, (%rdi) ; FAST-NEXT: movq %rdi, %rax +; FAST-NEXT: vmovups %ymm0, (%rax) ; FAST-NEXT: vzeroupper ; FAST-NEXT: retq ; ; SLOW-LABEL: foo: ; SLOW: # BB#0: -; SLOW-NEXT: vextractf128 $1, %ymm0, 16(%rdi) -; SLOW-NEXT: vmovups %xmm0, (%rdi) ; SLOW-NEXT: movq %rdi, %rax +; SLOW-NEXT: vextractf128 $1, %ymm0, 16(%rax) +; SLOW-NEXT: vmovups %xmm0, (%rax) ; SLOW-NEXT: vzeroupper ; SLOW-NEXT: retq %r = bitcast <8 x i32> %a to i256 Index: test/CodeGen/X86/bitcast-int-to-vector-bool.ll =================================================================== --- test/CodeGen/X86/bitcast-int-to-vector-bool.ll +++ test/CodeGen/X86/bitcast-int-to-vector-bool.ll @@ -200,8 +200,8 @@ define <32 x i1> @bitcast_i32_32i1(i32 %a0) { ; SSE2-SSSE3-LABEL: bitcast_i32_32i1: ; SSE2-SSSE3: # BB#0: -; SSE2-SSSE3-NEXT: movl %esi, (%rdi) ; SSE2-SSSE3-NEXT: movq %rdi, %rax +; SSE2-SSSE3-NEXT: movl %esi, (%rax) ; SSE2-SSSE3-NEXT: retq ; ; AVX1-LABEL: bitcast_i32_32i1: @@ -257,14 +257,14 @@ define <64 x i1> @bitcast_i64_64i1(i64 %a0) { ; SSE2-SSSE3-LABEL: bitcast_i64_64i1: ; SSE2-SSSE3: # BB#0: -; SSE2-SSSE3-NEXT: movq %rsi, (%rdi) ; SSE2-SSSE3-NEXT: movq %rdi, %rax +; SSE2-SSSE3-NEXT: movq %rsi, (%rax) ; SSE2-SSSE3-NEXT: retq ; ; AVX12-LABEL: bitcast_i64_64i1: ; AVX12: # BB#0: -; AVX12-NEXT: movq %rsi, (%rdi) ; AVX12-NEXT: movq %rdi, %rax +; AVX12-NEXT: movq %rsi, (%rax) ; AVX12-NEXT: retq ; ; AVX512-LABEL: bitcast_i64_64i1: Index: test/CodeGen/X86/bitreverse.ll =================================================================== --- test/CodeGen/X86/bitreverse.ll +++ test/CodeGen/X86/bitreverse.ll @@ -341,20 +341,21 @@ ; ; X64-LABEL: test_bitreverse_i8: ; X64: # BB#0: -; X64-NEXT: rolb $4, %dil -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $51, %al -; X64-NEXT: shlb $2, %al -; X64-NEXT: andb $-52, %dil -; X64-NEXT: shrb $2, %dil -; X64-NEXT: orb %al, %dil -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $85, %al -; X64-NEXT: addb %al, %al -; X64-NEXT: andb $-86, %dil -; X64-NEXT: shrb %dil -; X64-NEXT: orb %al, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolb $4, %al +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andb $51, %cl +; X64-NEXT: shlb $2, %cl +; X64-NEXT: andb $-52, %al +; X64-NEXT: shrb $2, %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andb $85, %cl +; X64-NEXT: addb %cl, %cl +; X64-NEXT: andb $-86, %al +; X64-NEXT: shrb %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %b @@ -384,21 +385,22 @@ ; ; X64-LABEL: test_bitreverse_i4: ; X64: # BB#0: -; X64-NEXT: rolb $4, %dil -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $51, %al -; X64-NEXT: shlb $2, %al -; X64-NEXT: andb $-52, %dil -; X64-NEXT: shrb $2, %dil -; X64-NEXT: orb %al, %dil -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $80, %al -; X64-NEXT: addb %al, %al -; X64-NEXT: andb $-96, %dil -; X64-NEXT: shrb %dil -; X64-NEXT: orb %al, %dil -; X64-NEXT: shrb $4, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolb $4, %al +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andb $51, %cl +; X64-NEXT: shlb $2, %cl +; X64-NEXT: andb $-52, %al +; X64-NEXT: shrb $2, %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andb $80, %cl +; X64-NEXT: addb %cl, %cl +; X64-NEXT: andb $-96, %al +; X64-NEXT: shrb %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: shrb $4, %al +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq %b = call i4 @llvm.bitreverse.i4(i4 %a) ret i4 %b @@ -474,6 +476,7 @@ ; X64-LABEL: identity_i8: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 %a) %c = call i8 @llvm.bitreverse.i8(i8 %b) Index: test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll =================================================================== --- test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll +++ test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll @@ -10,9 +10,9 @@ define i64 @test__andn_u64(i64 %a0, i64 %a1) { ; X64-LABEL: test__andn_u64: ; X64: # BB#0: -; X64-NEXT: xorq $-1, %rdi -; X64-NEXT: andq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorq $-1, %rax +; X64-NEXT: andq %rsi, %rax ; X64-NEXT: retq %xor = xor i64 %a0, -1 %res = and i64 %xor, %a1 @@ -84,9 +84,9 @@ define i64 @test_andn_u64(i64 %a0, i64 %a1) { ; X64-LABEL: test_andn_u64: ; X64: # BB#0: -; X64-NEXT: xorq $-1, %rdi -; X64-NEXT: andq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorq $-1, %rax +; X64-NEXT: andq %rsi, %rax ; X64-NEXT: retq %xor = xor i64 %a0, -1 %res = and i64 %xor, %a1 Index: test/CodeGen/X86/bmi-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/bmi-intrinsics-fast-isel.ll +++ test/CodeGen/X86/bmi-intrinsics-fast-isel.ll @@ -47,9 +47,9 @@ ; ; X64-LABEL: test__andn_u32: ; X64: # BB#0: -; X64-NEXT: xorl $-1, %edi -; X64-NEXT: andl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl $-1, %eax +; X64-NEXT: andl %esi, %eax ; X64-NEXT: retq %xor = xor i32 %a0, -1 %res = and i32 %xor, %a1 @@ -199,9 +199,9 @@ ; ; X64-LABEL: test_andn_u32: ; X64: # BB#0: -; X64-NEXT: xorl $-1, %edi -; X64-NEXT: andl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl $-1, %eax +; X64-NEXT: andl %esi, %eax ; X64-NEXT: retq %xor = xor i32 %a0, -1 %res = and i32 %xor, %a1 Index: test/CodeGen/X86/bmi.ll =================================================================== --- test/CodeGen/X86/bmi.ll +++ test/CodeGen/X86/bmi.ll @@ -420,9 +420,9 @@ define i32 @non_bextr32(i32 %x) { ; CHECK-LABEL: non_bextr32: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: shrl $2, %edi -; CHECK-NEXT: andl $111, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $2, %eax +; CHECK-NEXT: andl $111, %eax ; CHECK-NEXT: retq entry: %shr = lshr i32 %x, 2 @@ -446,8 +446,9 @@ define i32 @bzhi32b(i32 %x, i8 zeroext %index) { ; BMI1-LABEL: bzhi32b: ; BMI1: # BB#0: # %entry -; BMI1-NEXT: movl $1, %eax ; BMI1-NEXT: movl %esi, %ecx +; BMI1-NEXT: movl $1, %eax +; BMI1-NEXT: # kill: %CL %CL %ECX ; BMI1-NEXT: shll %cl, %eax ; BMI1-NEXT: decl %eax ; BMI1-NEXT: andl %edi, %eax @@ -468,8 +469,9 @@ define i32 @bzhi32b_load(i32* %w, i8 zeroext %index) { ; BMI1-LABEL: bzhi32b_load: ; BMI1: # BB#0: # %entry -; BMI1-NEXT: movl $1, %eax ; BMI1-NEXT: movl %esi, %ecx +; BMI1-NEXT: movl $1, %eax +; BMI1-NEXT: # kill: %CL %CL %ECX ; BMI1-NEXT: shll %cl, %eax ; BMI1-NEXT: decl %eax ; BMI1-NEXT: andl (%rdi), %eax @@ -491,8 +493,9 @@ define i32 @bzhi32c(i32 %x, i8 zeroext %index) { ; BMI1-LABEL: bzhi32c: ; BMI1: # BB#0: # %entry -; BMI1-NEXT: movl $1, %eax ; BMI1-NEXT: movl %esi, %ecx +; BMI1-NEXT: movl $1, %eax +; BMI1-NEXT: # kill: %CL %CL %ECX ; BMI1-NEXT: shll %cl, %eax ; BMI1-NEXT: decl %eax ; BMI1-NEXT: andl %edi, %eax @@ -535,12 +538,12 @@ define i32 @bzhi32e(i32 %a, i32 %b) { ; BMI1-LABEL: bzhi32e: ; BMI1: # BB#0: # %entry +; BMI1-NEXT: movl %edi, %eax ; BMI1-NEXT: movl $32, %ecx ; BMI1-NEXT: subl %esi, %ecx -; BMI1-NEXT: shll %cl, %edi +; BMI1-NEXT: shll %cl, %eax ; BMI1-NEXT: # kill: %CL %CL %ECX -; BMI1-NEXT: shrl %cl, %edi -; BMI1-NEXT: movl %edi, %eax +; BMI1-NEXT: shrl %cl, %eax ; BMI1-NEXT: retq ; ; BMI2-LABEL: bzhi32e: @@ -557,8 +560,9 @@ define i64 @bzhi64b(i64 %x, i8 zeroext %index) { ; BMI1-LABEL: bzhi64b: ; BMI1: # BB#0: # %entry -; BMI1-NEXT: movl $1, %eax ; BMI1-NEXT: movl %esi, %ecx +; BMI1-NEXT: movl $1, %eax +; BMI1-NEXT: # kill: %CL %CL %ECX ; BMI1-NEXT: shlq %cl, %rax ; BMI1-NEXT: decq %rax ; BMI1-NEXT: andq %rdi, %rax @@ -626,12 +630,12 @@ define i64 @bzhi64e(i64 %a, i64 %b) { ; BMI1-LABEL: bzhi64e: ; BMI1: # BB#0: # %entry +; BMI1-NEXT: movq %rdi, %rax ; BMI1-NEXT: movl $64, %ecx ; BMI1-NEXT: subl %esi, %ecx -; BMI1-NEXT: shlq %cl, %rdi +; BMI1-NEXT: shlq %cl, %rax ; BMI1-NEXT: # kill: %CL %CL %ECX -; BMI1-NEXT: shrq %cl, %rdi -; BMI1-NEXT: movq %rdi, %rax +; BMI1-NEXT: shrq %cl, %rax ; BMI1-NEXT: retq ; ; BMI2-LABEL: bzhi64e: @@ -648,12 +652,12 @@ define i64 @bzhi64f(i64 %a, i32 %b) { ; BMI1-LABEL: bzhi64f: ; BMI1: # BB#0: # %entry +; BMI1-NEXT: movq %rdi, %rax ; BMI1-NEXT: movl $64, %ecx ; BMI1-NEXT: subl %esi, %ecx -; BMI1-NEXT: shlq %cl, %rdi +; BMI1-NEXT: shlq %cl, %rax ; BMI1-NEXT: # kill: %CL %CL %ECX -; BMI1-NEXT: shrq %cl, %rdi -; BMI1-NEXT: movq %rdi, %rax +; BMI1-NEXT: shrq %cl, %rax ; BMI1-NEXT: retq ; ; BMI2-LABEL: bzhi64f: @@ -707,8 +711,8 @@ define i64 @bzhi64_small_constant_mask(i64 %x) { ; CHECK-LABEL: bzhi64_small_constant_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF ; CHECK-NEXT: retq entry: %and = and i64 %x, 2147483647 Index: test/CodeGen/X86/bool-simplify.ll =================================================================== --- test/CodeGen/X86/bool-simplify.ll +++ test/CodeGen/X86/bool-simplify.ll @@ -4,9 +4,9 @@ define i32 @foo(<2 x i64> %c, i32 %a, i32 %b) { ; CHECK-LABEL: foo: ; CHECK: # BB#0: -; CHECK-NEXT: ptest %xmm0, %xmm0 -; CHECK-NEXT: cmovnel %esi, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ptest %xmm0, %xmm0 +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: retq %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c) %t2 = icmp ne i32 %t1, 0 Index: test/CodeGen/X86/bswap-rotate.ll =================================================================== --- test/CodeGen/X86/bswap-rotate.ll +++ test/CodeGen/X86/bswap-rotate.ll @@ -14,8 +14,9 @@ ; ; X64-LABEL: combine_bswap_rotate: ; X64: # BB#0: -; X64-NEXT: rolw $9, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolw $9, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %1 = call i16 @llvm.bswap.i16(i16 %a0) %2 = shl i16 %1, 1 Index: test/CodeGen/X86/bswap-wide-int.ll =================================================================== --- test/CodeGen/X86/bswap-wide-int.ll +++ test/CodeGen/X86/bswap-wide-int.ll @@ -25,14 +25,14 @@ ; ; X64-LABEL: bswap_i64: ; X64: # BB#0: -; X64-NEXT: bswapq %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: bswapq %rax ; X64-NEXT: retq ; ; X64-MOVBE-LABEL: bswap_i64: ; X64-MOVBE: # BB#0: -; X64-MOVBE-NEXT: bswapq %rdi ; X64-MOVBE-NEXT: movq %rdi, %rax +; X64-MOVBE-NEXT: bswapq %rax ; X64-MOVBE-NEXT: retq %1 = call i64 @llvm.bswap.i64(i64 %a0) ret i64 %1 @@ -79,17 +79,17 @@ ; ; X64-LABEL: bswap_i128: ; X64: # BB#0: -; X64-NEXT: bswapq %rsi -; X64-NEXT: bswapq %rdi ; X64-NEXT: movq %rsi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: bswapq %rdi ; X64-NEXT: movq %rdi, %rdx ; X64-NEXT: retq ; ; X64-MOVBE-LABEL: bswap_i128: ; X64-MOVBE: # BB#0: -; X64-MOVBE-NEXT: bswapq %rsi -; X64-MOVBE-NEXT: bswapq %rdi ; X64-MOVBE-NEXT: movq %rsi, %rax +; X64-MOVBE-NEXT: bswapq %rax +; X64-MOVBE-NEXT: bswapq %rdi ; X64-MOVBE-NEXT: movq %rdi, %rdx ; X64-MOVBE-NEXT: retq %1 = call i128 @llvm.bswap.i128(i128 %a0) @@ -149,24 +149,24 @@ ; ; X64-LABEL: bswap_i256: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: bswapq %r8 ; X64-NEXT: bswapq %rcx ; X64-NEXT: bswapq %rdx ; X64-NEXT: bswapq %rsi -; X64-NEXT: movq %rsi, 24(%rdi) -; X64-NEXT: movq %rdx, 16(%rdi) -; X64-NEXT: movq %rcx, 8(%rdi) -; X64-NEXT: movq %r8, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rsi, 24(%rax) +; X64-NEXT: movq %rdx, 16(%rax) +; X64-NEXT: movq %rcx, 8(%rax) +; X64-NEXT: movq %r8, (%rax) ; X64-NEXT: retq ; ; X64-MOVBE-LABEL: bswap_i256: ; X64-MOVBE: # BB#0: -; X64-MOVBE-NEXT: movbeq %rsi, 24(%rdi) -; X64-MOVBE-NEXT: movbeq %rdx, 16(%rdi) -; X64-MOVBE-NEXT: movbeq %rcx, 8(%rdi) -; X64-MOVBE-NEXT: movbeq %r8, (%rdi) ; X64-MOVBE-NEXT: movq %rdi, %rax +; X64-MOVBE-NEXT: movbeq %rsi, 24(%rax) +; X64-MOVBE-NEXT: movbeq %rdx, 16(%rax) +; X64-MOVBE-NEXT: movbeq %rcx, 8(%rax) +; X64-MOVBE-NEXT: movbeq %r8, (%rax) ; X64-MOVBE-NEXT: retq %1 = call i256 @llvm.bswap.i256(i256 %a0) ret i256 %1 Index: test/CodeGen/X86/bswap_tree.ll =================================================================== --- test/CodeGen/X86/bswap_tree.ll +++ test/CodeGen/X86/bswap_tree.ll @@ -20,9 +20,9 @@ ; ; CHECK64-LABEL: test1: ; CHECK64: # BB#0: -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: roll $16, %edi ; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: roll $16, %eax ; CHECK64-NEXT: retq %byte0 = and i32 %x, 255 ; 0x000000ff %byte1 = and i32 %x, 65280 ; 0x0000ff00 @@ -53,9 +53,9 @@ ; ; CHECK64-LABEL: test2: ; CHECK64: # BB#0: -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: roll $16, %edi ; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: roll $16, %eax ; CHECK64-NEXT: retq %byte1 = shl i32 %x, 8 %byte0 = lshr i32 %x, 8 Index: test/CodeGen/X86/bswap_tree2.ll =================================================================== --- test/CodeGen/X86/bswap_tree2.ll +++ test/CodeGen/X86/bswap_tree2.ll @@ -25,16 +25,16 @@ ; CHECK64-LABEL: test1: ; CHECK64: # BB#0: ; CHECK64-NEXT: movl %edi, %eax -; CHECK64-NEXT: andl $16711680, %eax # imm = 0xFF0000 -; CHECK64-NEXT: movl %edi, %ecx -; CHECK64-NEXT: orl $-16777216, %ecx # imm = 0xFF000000 -; CHECK64-NEXT: shll $8, %eax -; CHECK64-NEXT: shrl $8, %ecx -; CHECK64-NEXT: orl %eax, %ecx -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: shrl $16, %edi -; CHECK64-NEXT: orl %ecx, %edi -; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: movl %eax, %ecx +; CHECK64-NEXT: andl $16711680, %ecx # imm = 0xFF0000 +; CHECK64-NEXT: movl %eax, %edx +; CHECK64-NEXT: orl $-16777216, %edx # imm = 0xFF000000 +; CHECK64-NEXT: shll $8, %ecx +; CHECK64-NEXT: shrl $8, %edx +; CHECK64-NEXT: orl %ecx, %edx +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: shrl $16, %eax +; CHECK64-NEXT: orl %edx, %eax ; CHECK64-NEXT: retq %byte0 = and i32 %x, 255 ; 0x000000ff %byte1 = and i32 %x, 65280 ; 0x0000ff00 Index: test/CodeGen/X86/bt.ll =================================================================== --- test/CodeGen/X86/bt.ll +++ test/CodeGen/X86/bt.ll @@ -1112,16 +1112,16 @@ ; ; X64-LABEL: demanded_i32: ; X64: # BB#0: -; X64-NEXT: movl %edx, %eax -; X64-NEXT: shrl $5, %eax -; X64-NEXT: movl (%rdi,%rax,4), %r8d -; X64-NEXT: movl $1, %edi ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shll %cl, %edi -; X64-NEXT: btl %edx, %r8d +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: shrl $5, %eax +; X64-NEXT: movl (%rdi,%rax,4), %edi +; X64-NEXT: movl $1, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: btl %ecx, %edi ; X64-NEXT: jae .LBB30_2 ; X64-NEXT: # BB#1: -; X64-NEXT: orl %edi, (%rsi,%rax,4) +; X64-NEXT: orl %edx, (%rsi,%rax,4) ; X64-NEXT: .LBB30_2: ; X64-NEXT: retq %4 = lshr i32 %2, 5 Index: test/CodeGen/X86/bypass-slow-division-64.ll =================================================================== --- test/CodeGen/X86/bypass-slow-division-64.ll +++ test/CodeGen/X86/bypass-slow-division-64.ll @@ -8,17 +8,17 @@ ; CHECK-LABEL: Test_get_quotient: ; CHECK: # BB#0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: orq %rsi, %rcx +; CHECK-NEXT: shrq $32, %rcx ; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: # BB#2: -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: cqto ; CHECK-NEXT: idivq %rsi ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %EAX %EAX %RAX ; CHECK-NEXT: divl %esi ; CHECK-NEXT: # kill: %EAX %EAX %RAX ; CHECK-NEXT: retq @@ -30,21 +30,20 @@ ; CHECK-LABEL: Test_get_remainder: ; CHECK: # BB#0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: orq %rsi, %rcx +; CHECK-NEXT: shrq $32, %rcx ; CHECK-NEXT: je .LBB1_1 ; CHECK-NEXT: # BB#2: -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: cqto ; CHECK-NEXT: idivq %rsi ; CHECK-NEXT: movq %rdx, %rax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB1_1: ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %EAX %EAX %RAX ; CHECK-NEXT: divl %esi -; CHECK-NEXT: # kill: %EDX %EDX %RDX -; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: retq %result = srem i64 %a, %b ret i64 %result @@ -54,18 +53,18 @@ ; CHECK-LABEL: Test_get_quotient_and_remainder: ; CHECK: # BB#0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: orq %rsi, %rcx +; CHECK-NEXT: shrq $32, %rcx ; CHECK-NEXT: je .LBB2_1 ; CHECK-NEXT: # BB#2: -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: cqto ; CHECK-NEXT: idivq %rsi ; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB2_1: ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %EAX %EAX %RAX ; CHECK-NEXT: divl %esi ; CHECK-NEXT: # kill: %EDX %EDX %RDX ; CHECK-NEXT: # kill: %EAX %EAX %RAX Index: test/CodeGen/X86/cmov-into-branch.ll =================================================================== --- test/CodeGen/X86/cmov-into-branch.ll +++ test/CodeGen/X86/cmov-into-branch.ll @@ -5,9 +5,9 @@ define i32 @test1(double %a, double* nocapture %b, i32 %x, i32 %y) { ; CHECK-LABEL: test1: ; CHECK: # BB#0: -; CHECK-NEXT: ucomisd (%rdi), %xmm0 -; CHECK-NEXT: cmovbel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: ucomisd (%rdi), %xmm0 +; CHECK-NEXT: cmovbel %edx, %eax ; CHECK-NEXT: retq %load = load double, double* %b, align 8 %cmp = fcmp olt double %load, %a @@ -19,9 +19,9 @@ define i32 @test2(double %a, double %b, i32 %x, i32 %y) { ; CHECK-LABEL: test2: ; CHECK: # BB#0: -; CHECK-NEXT: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: cmovbel %esi, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ucomisd %xmm1, %xmm0 +; CHECK-NEXT: cmovbel %esi, %eax ; CHECK-NEXT: retq %cmp = fcmp ogt double %a, %b %cond = select i1 %cmp, i32 %x, i32 %y @@ -48,10 +48,10 @@ define i32 @test5(i32 %a, i32* nocapture %b, i32 %x, i32 %y) { ; CHECK-LABEL: test5: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl %edi, (%rsi) -; CHECK-NEXT: cmoval %edi, %ecx -; CHECK-NEXT: cmovael %edx, %ecx ; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: cmpl %edi, (%rsi) +; CHECK-NEXT: cmoval %edi, %eax +; CHECK-NEXT: cmovael %edx, %eax ; CHECK-NEXT: retq %load = load i32, i32* %b, align 4 %cmp = icmp ult i32 %load, %a @@ -83,9 +83,9 @@ define i32 @weighted_select1(i32 %a, i32 %b) { ; CHECK-LABEL: weighted_select1: ; CHECK: # BB#0: -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: cmovnel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: cmovnel %edi, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !0 @@ -96,12 +96,12 @@ define i32 @weighted_select2(i32 %a, i32 %b) { ; CHECK-LABEL: weighted_select2: ; CHECK: # BB#0: -; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: jne .LBB6_2 ; CHECK-NEXT: # BB#1: # %select.false -; CHECK-NEXT: movl %esi, %edi +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: .LBB6_2: # %select.end -; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !1 @@ -115,14 +115,13 @@ define i32 @weighted_select3(i32 %a, i32 %b) { ; CHECK-LABEL: weighted_select3: ; CHECK: # BB#0: -; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: je .LBB7_1 ; CHECK-NEXT: # BB#2: # %select.end -; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB7_1: # %select.false -; CHECK-NEXT: movl %esi, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !2 @@ -133,9 +132,9 @@ define i32 @unweighted_select(i32 %a, i32 %b) { ; CHECK-LABEL: unweighted_select: ; CHECK: # BB#0: -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: cmovnel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: cmovnel %edi, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !3 Index: test/CodeGen/X86/cmov.ll =================================================================== --- test/CodeGen/X86/cmov.ll +++ test/CodeGen/X86/cmov.ll @@ -194,12 +194,13 @@ define i8 @test7(i1 inreg %c, i8 inreg %a, i8 inreg %b) nounwind { ; CHECK-LABEL: test7: ; CHECK: # BB#0: +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: testb $1, %dil ; CHECK-NEXT: jne .LBB6_2 ; CHECK-NEXT: # BB#1: -; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: .LBB6_2: -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %d = select i1 %c, i8 %a, i8 %b ret i8 %d Index: test/CodeGen/X86/cmovcmov.ll =================================================================== --- test/CodeGen/X86/cmovcmov.ll +++ test/CodeGen/X86/cmovcmov.ll @@ -9,10 +9,10 @@ ; CHECK-LABEL: test_select_fcmp_oeq_i32: -; CMOV-NEXT: ucomiss %xmm1, %xmm0 -; CMOV-NEXT: cmovnel %esi, %edi -; CMOV-NEXT: cmovpl %esi, %edi ; CMOV-NEXT: movl %edi, %eax +; CMOV-NEXT: ucomiss %xmm1, %xmm0 +; CMOV-NEXT: cmovnel %esi, %eax +; CMOV-NEXT: cmovpl %esi, %eax ; CMOV-NEXT: retq ; NOCMOV-NEXT: flds 8(%esp) @@ -36,10 +36,10 @@ ; CHECK-LABEL: test_select_fcmp_oeq_i64: -; CMOV-NEXT: ucomiss %xmm1, %xmm0 -; CMOV-NEXT: cmovneq %rsi, %rdi -; CMOV-NEXT: cmovpq %rsi, %rdi ; CMOV-NEXT: movq %rdi, %rax +; CMOV-NEXT: ucomiss %xmm1, %xmm0 +; CMOV-NEXT: cmovneq %rsi, %rax +; CMOV-NEXT: cmovpq %rsi, %rax ; CMOV-NEXT: retq ; NOCMOV-NEXT: flds 8(%esp) @@ -64,10 +64,10 @@ ; CHECK-LABEL: test_select_fcmp_une_i64: -; CMOV-NEXT: ucomiss %xmm1, %xmm0 -; CMOV-NEXT: cmovneq %rdi, %rsi -; CMOV-NEXT: cmovpq %rdi, %rsi ; CMOV-NEXT: movq %rsi, %rax +; CMOV-NEXT: ucomiss %xmm1, %xmm0 +; CMOV-NEXT: cmovneq %rdi, %rax +; CMOV-NEXT: cmovpq %rdi, %rax ; CMOV-NEXT: retq ; NOCMOV-NEXT: flds 8(%esp) Index: test/CodeGen/X86/cmp.ll =================================================================== --- test/CodeGen/X86/cmp.ll +++ test/CodeGen/X86/cmp.ll @@ -268,9 +268,9 @@ define i32 @test13(i32 %mask, i32 %base, i32 %intra) { ; CHECK-LABEL: test13: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: testb $8, %dil # encoding: [0x40,0xf6,0xc7,0x08] -; CHECK-NEXT: cmovnel %edx, %esi # encoding: [0x0f,0x45,0xf2] ; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] +; CHECK-NEXT: testb $8, %dil # encoding: [0x40,0xf6,0xc7,0x08] +; CHECK-NEXT: cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2] ; CHECK-NEXT: retq # encoding: [0xc3] entry: %and = and i32 %mask, 8 @@ -283,9 +283,9 @@ define i32 @test14(i32 %mask, i32 %base, i32 %intra) { ; CHECK-LABEL: test14: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: shrl $7, %edi # encoding: [0xc1,0xef,0x07] -; CHECK-NEXT: cmovnsl %edx, %esi # encoding: [0x0f,0x49,0xf2] ; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] +; CHECK-NEXT: shrl $7, %edi # encoding: [0xc1,0xef,0x07] +; CHECK-NEXT: cmovnsl %edx, %eax # encoding: [0x0f,0x49,0xc2] ; CHECK-NEXT: retq # encoding: [0xc3] entry: %s = lshr i32 %mask, 7 Index: test/CodeGen/X86/combine-add.ll =================================================================== --- test/CodeGen/X86/combine-add.ll +++ test/CodeGen/X86/combine-add.ll @@ -103,8 +103,8 @@ define <4 x i32> @combine_vec_add_sub_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_add0: ; SSE: # BB#0: -; SSE-NEXT: psubd %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_add0: @@ -121,8 +121,8 @@ define <4 x i32> @combine_vec_add_sub_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_add1: ; SSE: # BB#0: -; SSE-NEXT: psubd %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_add1: @@ -139,8 +139,8 @@ define <4 x i32> @combine_vec_add_sub_add2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_add2: ; SSE: # BB#0: -; SSE-NEXT: paddd %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: paddd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_add2: @@ -157,8 +157,8 @@ define <4 x i32> @combine_vec_add_sub_add3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_add3: ; SSE: # BB#0: -; SSE-NEXT: psubd %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_add3: @@ -203,9 +203,9 @@ ; ; AVX-LABEL: combine_vec_add_uniquebits: ; AVX: # BB#0: -; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 +; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [61680,61680,61680,61680] ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 +; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [3855,3855,3855,3855] ; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq Index: test/CodeGen/X86/conditional-indecrement.ll =================================================================== --- test/CodeGen/X86/conditional-indecrement.ll +++ test/CodeGen/X86/conditional-indecrement.ll @@ -4,9 +4,9 @@ define i32 @test1(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test1: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $-1, %eax ; CHECK-NEXT: retq %not.cmp = icmp ne i32 %a, 0 %inc = zext i1 %not.cmp to i32 @@ -17,9 +17,9 @@ define i32 @test1_commute(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test1_commute: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $-1, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -30,9 +30,9 @@ define i32 @test2(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test2: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: adcl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: adcl $0, %eax ; CHECK-NEXT: retq %cmp = icmp eq i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -43,9 +43,9 @@ define i32 @test3(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test3: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: adcl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: adcl $0, %eax ; CHECK-NEXT: retq %cmp = icmp eq i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -56,9 +56,9 @@ define i32 @test4(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test4: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $-1, %eax ; CHECK-NEXT: retq %not.cmp = icmp ne i32 %a, 0 %inc = zext i1 %not.cmp to i32 @@ -69,9 +69,9 @@ define i32 @test5(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test5: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: adcl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: adcl $-1, %eax ; CHECK-NEXT: retq %not.cmp = icmp ne i32 %a, 0 %inc = zext i1 %not.cmp to i32 @@ -82,9 +82,9 @@ define i32 @test6(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test6: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $0, %eax ; CHECK-NEXT: retq %cmp = icmp eq i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -95,9 +95,9 @@ define i32 @test7(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test7: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $0, %eax ; CHECK-NEXT: retq %cmp = icmp eq i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -108,9 +108,9 @@ define i32 @test8(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test8: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: adcl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: adcl $-1, %eax ; CHECK-NEXT: retq %not.cmp = icmp ne i32 %a, 0 %inc = zext i1 %not.cmp to i32 Index: test/CodeGen/X86/divide-by-constant.ll =================================================================== --- test/CodeGen/X86/divide-by-constant.ll +++ test/CodeGen/X86/divide-by-constant.ll @@ -96,8 +96,8 @@ ; X32: # BB#0: ; X32-NEXT: movl $365384439, %eax # imm = 0x15C752F7 ; X32-NEXT: mull {{[0-9]+}}(%esp) -; X32-NEXT: shrl $27, %edx ; X32-NEXT: movl %edx, %eax +; X32-NEXT: shrl $27, %eax ; X32-NEXT: retl ; ; X64-LABEL: test5: @@ -222,9 +222,9 @@ ; ; X64-LABEL: testsize1: ; X64: # BB#0: # %entry +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pushq $32 ; X64-NEXT: popq %rcx -; X64-NEXT: movl %edi, %eax ; X64-NEXT: cltd ; X64-NEXT: idivl %ecx ; X64-NEXT: retq @@ -245,9 +245,9 @@ ; ; X64-LABEL: testsize2: ; X64: # BB#0: # %entry +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pushq $33 ; X64-NEXT: popq %rcx -; X64-NEXT: movl %edi, %eax ; X64-NEXT: cltd ; X64-NEXT: idivl %ecx ; X64-NEXT: retq @@ -265,8 +265,8 @@ ; ; X64-LABEL: testsize3: ; X64: # BB#0: # %entry -; X64-NEXT: shrl $5, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrl $5, %eax ; X64-NEXT: retq entry: %div = udiv i32 %x, 32 @@ -285,10 +285,10 @@ ; ; X64-LABEL: testsize4: ; X64: # BB#0: # %entry +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pushq $33 ; X64-NEXT: popq %rcx ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: movl %edi, %eax ; X64-NEXT: divl %ecx ; X64-NEXT: retq entry: @@ -316,19 +316,18 @@ ; ; X64-LABEL: PR23590: ; X64: # BB#0: # %entry -; X64-NEXT: movq %rdi, %rcx -; X64-NEXT: movabsq $6120523590596543007, %rdx # imm = 0x54F077C718E7C21F -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rdx +; X64-NEXT: movabsq $6120523590596543007, %rcx # imm = 0x54F077C718E7C21F +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx ; X64-NEXT: shrq $12, %rdx ; X64-NEXT: imulq $12345, %rdx, %rax # imm = 0x3039 -; X64-NEXT: subq %rax, %rcx -; X64-NEXT: movabsq $2635249153387078803, %rdx # imm = 0x2492492492492493 -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rdx -; X64-NEXT: subq %rdx, %rcx -; X64-NEXT: shrq %rcx -; X64-NEXT: leaq (%rcx,%rdx), %rax +; X64-NEXT: subq %rax, %rdi +; X64-NEXT: movabsq $2635249153387078803, %rcx # imm = 0x2492492492492493 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: subq %rdx, %rdi +; X64-NEXT: shrq %rdi +; X64-NEXT: leaq (%rdi,%rdx), %rax ; X64-NEXT: shrq $2, %rax ; X64-NEXT: retq entry: Index: test/CodeGen/X86/divrem.ll =================================================================== --- test/CodeGen/X86/divrem.ll +++ test/CodeGen/X86/divrem.ll @@ -101,6 +101,7 @@ ; X64: # BB#0: ; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: cwtd ; X64-NEXT: idivw %si ; X64-NEXT: movw %ax, (%r8) @@ -131,6 +132,7 @@ ; X64-LABEL: si8: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %esi # NOREX @@ -182,8 +184,8 @@ ; X64-LABEL: ui64: ; X64: # BB#0: ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divq %rsi ; X64-NEXT: movq %rax, (%r8) ; X64-NEXT: movq %rdx, (%rcx) @@ -212,8 +214,8 @@ ; X64-LABEL: ui32: ; X64: # BB#0: ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divl %esi ; X64-NEXT: movl %eax, (%r8) ; X64-NEXT: movl %edx, (%rcx) @@ -242,8 +244,9 @@ ; X64-LABEL: ui16: ; X64: # BB#0: ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: divw %si ; X64-NEXT: movw %ax, (%r8) ; X64-NEXT: movw %dx, (%rcx) Index: test/CodeGen/X86/divrem8_ext.ll =================================================================== --- test/CodeGen/X86/divrem8_ext.ll +++ test/CodeGen/X86/divrem8_ext.ll @@ -113,6 +113,7 @@ ; X64-LABEL: test_sdivrem_sext_ah: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %ecx # NOREX @@ -138,6 +139,7 @@ ; X64-LABEL: test_srem_sext_ah: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %eax # NOREX @@ -162,6 +164,7 @@ ; X64-LABEL: test_srem_noext_ah: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %eax # NOREX @@ -187,6 +190,7 @@ ; X64-LABEL: test_srem_sext64_ah: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %eax # NOREX Index: test/CodeGen/X86/fast-isel-fold-mem.ll =================================================================== --- test/CodeGen/X86/fast-isel-fold-mem.ll +++ test/CodeGen/X86/fast-isel-fold-mem.ll @@ -3,8 +3,8 @@ define i64 @fold_load(i64* %a, i64 %b) { ; CHECK-LABEL: fold_load -; CHECK: addq (%rdi), %rsi -; CHECK-NEXT: movq %rsi, %rax +; CHECK: movq %rsi, %rax +; CHECK-NEXT: addq (%rdi), %rax %1 = load i64, i64* %a, align 8 %2 = add i64 %1, %b ret i64 %2 Index: test/CodeGen/X86/fast-isel-select-cmov.ll =================================================================== --- test/CodeGen/X86/fast-isel-select-cmov.ll +++ test/CodeGen/X86/fast-isel-select-cmov.ll @@ -31,9 +31,9 @@ define i32 @select_cmov_i32(i1 zeroext %cond, i32 %a, i32 %b) { ; CHECK-LABEL: select_cmov_i32: ; CHECK: ## BB#0: -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: cmovel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: cmovel %edx, %eax ; CHECK-NEXT: retq %1 = select i1 %cond, i32 %a, i32 %b ret i32 %1 @@ -42,9 +42,9 @@ define i32 @select_cmp_cmov_i32(i32 %a, i32 %b) { ; CHECK-LABEL: select_cmp_cmov_i32: ; CHECK: ## BB#0: -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: cmovbl %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: cmovbl %edi, %eax ; CHECK-NEXT: retq %1 = icmp ult i32 %a, %b %2 = select i1 %1, i32 %a, i32 %b @@ -54,9 +54,9 @@ define i64 @select_cmov_i64(i1 zeroext %cond, i64 %a, i64 %b) { ; CHECK-LABEL: select_cmov_i64: ; CHECK: ## BB#0: -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: cmoveq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: cmoveq %rdx, %rax ; CHECK-NEXT: retq %1 = select i1 %cond, i64 %a, i64 %b ret i64 %1 @@ -65,9 +65,9 @@ define i64 @select_cmp_cmov_i64(i64 %a, i64 %b) { ; CHECK-LABEL: select_cmp_cmov_i64: ; CHECK: ## BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovbq %rdi, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: cmovbq %rdi, %rax ; CHECK-NEXT: retq %1 = icmp ult i64 %a, %b %2 = select i1 %1, i64 %a, i64 %b Index: test/CodeGen/X86/fast-isel-select-cmov2.ll =================================================================== --- test/CodeGen/X86/fast-isel-select-cmov2.ll +++ test/CodeGen/X86/fast-isel-select-cmov2.ll @@ -14,14 +14,14 @@ define i64 @select_fcmp_oeq_cmov(double %a, double %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_fcmp_oeq_cmov -; CHECK: ucomisd %xmm1, %xmm0 -; SDAG-NEXT: cmovneq %rsi, %rdi -; SDAG-NEXT: cmovpq %rsi, %rdi -; SDAG-NEXT: movq %rdi, %rax -; FAST-NEXT: setnp %al -; FAST-NEXT: sete %cl -; FAST-NEXT: testb %al, %cl -; FAST-NEXT: cmoveq %rsi, %rdi +; CHECK: movq %rdi, %rax +; CHECK-NEXT: ucomisd %xmm1, %xmm0 +; SDAG-NEXT: cmovneq %rsi, %rax +; SDAG-NEXT: cmovpq %rsi, %rax +; FAST-NEXT: setnp %cl +; FAST-NEXT: sete %dl +; FAST-NEXT: testb %cl, %dl +; FAST-NEXT: cmoveq %rsi, %rax %1 = fcmp oeq double %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -30,7 +30,7 @@ define i64 @select_fcmp_ogt_cmov(double %a, double %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_fcmp_ogt_cmov ; CHECK: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: cmovbeq %rsi, %rdi +; CHECK-NEXT: cmovbeq %rsi, %rax %1 = fcmp ogt double %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -39,7 +39,7 @@ define i64 @select_fcmp_oge_cmov(double %a, double %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_fcmp_oge_cmov ; CHECK: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: cmovbq %rsi, %rdi +; CHECK-NEXT: cmovbq %rsi, %rax %1 = fcmp oge double %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -48,7 +48,7 @@ define i64 @select_fcmp_olt_cmov(double %a, double %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_fcmp_olt_cmov ; CHECK: ucomisd %xmm0, %xmm1 -; CHECK-NEXT: cmovbeq %rsi, %rdi +; CHECK-NEXT: cmovbeq %rsi, %rax %1 = fcmp olt double %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -57,7 +57,7 @@ define i64 @select_fcmp_ole_cmov(double %a, double %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_fcmp_ole_cmov ; CHECK: ucomisd %xmm0, %xmm1 -; CHECK-NEXT: cmovbq %rsi, %rdi +; CHECK-NEXT: cmovbq %rsi, %rax %1 = fcmp ole double %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -66,7 +66,7 @@ define i64 @select_fcmp_one_cmov(double %a, double %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_fcmp_one_cmov ; CHECK: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: cmoveq %rsi, %rdi +; CHECK-NEXT: cmoveq %rsi, %rax %1 = fcmp one double %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -75,7 +75,7 @@ define i64 @select_fcmp_ord_cmov(double %a, double %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_fcmp_ord_cmov ; CHECK: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: cmovpq %rsi, %rdi +; CHECK-NEXT: cmovpq %rsi, %rax %1 = fcmp ord double %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -84,7 +84,7 @@ define i64 @select_fcmp_uno_cmov(double %a, double %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_fcmp_uno_cmov ; CHECK: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: cmovnpq %rsi, %rdi +; CHECK-NEXT: cmovnpq %rsi, %rax %1 = fcmp uno double %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -93,7 +93,7 @@ define i64 @select_fcmp_ueq_cmov(double %a, double %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_fcmp_ueq_cmov ; CHECK: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: cmovneq %rsi, %rdi +; CHECK-NEXT: cmovneq %rsi, %rax %1 = fcmp ueq double %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -102,7 +102,7 @@ define i64 @select_fcmp_ugt_cmov(double %a, double %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_fcmp_ugt_cmov ; CHECK: ucomisd %xmm0, %xmm1 -; CHECK-NEXT: cmovaeq %rsi, %rdi +; CHECK-NEXT: cmovaeq %rsi, %rax %1 = fcmp ugt double %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -111,7 +111,7 @@ define i64 @select_fcmp_uge_cmov(double %a, double %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_fcmp_uge_cmov ; CHECK: ucomisd %xmm0, %xmm1 -; CHECK-NEXT: cmovaq %rsi, %rdi +; CHECK-NEXT: cmovaq %rsi, %rax %1 = fcmp uge double %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -120,7 +120,7 @@ define i64 @select_fcmp_ult_cmov(double %a, double %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_fcmp_ult_cmov ; CHECK: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: cmovaeq %rsi, %rdi +; CHECK-NEXT: cmovaeq %rsi, %rax %1 = fcmp ult double %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -129,7 +129,7 @@ define i64 @select_fcmp_ule_cmov(double %a, double %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_fcmp_ule_cmov ; CHECK: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: cmovaq %rsi, %rdi +; CHECK-NEXT: cmovaq %rsi, %rax %1 = fcmp ule double %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -137,14 +137,15 @@ define i64 @select_fcmp_une_cmov(double %a, double %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_fcmp_une_cmov -; CHECK: ucomisd %xmm1, %xmm0 -; SDAG-NEXT: cmovneq %rdi, %rsi -; SDAG-NEXT: cmovpq %rdi, %rsi -; SDAG-NEXT: movq %rsi, %rax -; FAST-NEXT: setp %al -; FAST-NEXT: setne %cl -; FAST-NEXT: orb %al, %cl -; FAST-NEXT: cmoveq %rsi, %rdi +; SDAG: movq %rsi, %rax +; FAST: movq %rdi, %rax +; CHECK-NEXT: ucomisd %xmm1, %xmm0 +; SDAG-NEXT: cmovneq %rdi, %rax +; SDAG-NEXT: cmovpq %rdi, %rax +; FAST-NEXT: setp %cl +; FAST-NEXT: setne %dl +; FAST-NEXT: orb %cl, %dl +; FAST-NEXT: cmoveq %rsi, %rax %1 = fcmp une double %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -160,9 +161,9 @@ define i64 @select_icmp_eq_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_eq_cmov -; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: cmovneq %rcx, %rdx -; CHECK-NEXT: movq %rdx, %rax +; CHECK: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovneq %rcx, %rax %1 = icmp eq i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -170,9 +171,9 @@ define i64 @select_icmp_ne_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_ne_cmov -; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: cmoveq %rcx, %rdx -; CHECK-NEXT: movq %rdx, %rax +; CHECK: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmoveq %rcx, %rax %1 = icmp ne i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -180,9 +181,9 @@ define i64 @select_icmp_ugt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_ugt_cmov -; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: cmovbeq %rcx, %rdx -; CHECK-NEXT: movq %rdx, %rax +; CHECK: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovbeq %rcx, %rax %1 = icmp ugt i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -191,9 +192,9 @@ define i64 @select_icmp_uge_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_uge_cmov -; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: cmovbq %rcx, %rdx -; CHECK-NEXT: movq %rdx, %rax +; CHECK: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovbq %rcx, %rax %1 = icmp uge i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -201,9 +202,9 @@ define i64 @select_icmp_ult_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_ult_cmov -; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: cmovaeq %rcx, %rdx -; CHECK-NEXT: movq %rdx, %rax +; CHECK: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovaeq %rcx, %rax %1 = icmp ult i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -211,9 +212,9 @@ define i64 @select_icmp_ule_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_ule_cmov -; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: cmovaq %rcx, %rdx -; CHECK-NEXT: movq %rdx, %rax +; CHECK: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovaq %rcx, %rax %1 = icmp ule i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -221,9 +222,9 @@ define i64 @select_icmp_sgt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_sgt_cmov -; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: cmovleq %rcx, %rdx -; CHECK-NEXT: movq %rdx, %rax +; CHECK: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovleq %rcx, %rax %1 = icmp sgt i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -231,9 +232,9 @@ define i64 @select_icmp_sge_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_sge_cmov -; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: cmovlq %rcx, %rdx -; CHECK-NEXT: movq %rdx, %rax +; CHECK: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovlq %rcx, %rax %1 = icmp sge i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -241,9 +242,9 @@ define i64 @select_icmp_slt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_slt_cmov -; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: cmovgeq %rcx, %rdx -; CHECK-NEXT: movq %rdx, %rax +; CHECK: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovgeq %rcx, %rax %1 = icmp slt i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 @@ -251,9 +252,9 @@ define i64 @select_icmp_sle_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_sle_cmov -; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: cmovgq %rcx, %rdx -; CHECK-NEXT: movq %rdx, %rax +; CHECK: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovgq %rcx, %rax %1 = icmp sle i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d ret i64 %2 Index: test/CodeGen/X86/fast-isel-sext-zext.ll =================================================================== --- test/CodeGen/X86/fast-isel-sext-zext.ll +++ test/CodeGen/X86/fast-isel-sext-zext.ll @@ -9,15 +9,14 @@ ; X32-NEXT: andb $1, %al ; X32-NEXT: negb %al ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test1: ; X64: ## BB#0: -; X64-NEXT: andb $1, %dil -; X64-NEXT: negb %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $1, %al +; X64-NEXT: negb %al +; X64-NEXT: ## kill: %AL %AL %EAX ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i8 %x to i1 %u = sext i1 %z to i8 ret i8 %u @@ -32,7 +31,6 @@ ; X32-NEXT: movsbl %al, %eax ; X32-NEXT: ## kill: %AX %AX %EAX ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test2: ; X64: ## BB#0: @@ -41,7 +39,6 @@ ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: ## kill: %AX %AX %EAX ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i16 %x to i1 %u = sext i1 %z to i16 ret i16 %u @@ -55,7 +52,6 @@ ; X32-NEXT: negb %al ; X32-NEXT: movsbl %al, %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test3: ; X64: ## BB#0: @@ -63,7 +59,6 @@ ; X64-NEXT: negb %dil ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i32 %x to i1 %u = sext i1 %z to i32 ret i32 %u @@ -77,7 +72,6 @@ ; X32-NEXT: negb %al ; X32-NEXT: movsbl %al, %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test4: ; X64: ## BB#0: @@ -85,7 +79,6 @@ ; X64-NEXT: negb %dil ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i32 %x to i1 %u = sext i1 %z to i32 ret i32 %u @@ -97,14 +90,13 @@ ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: andb $1, %al ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test5: ; X64: ## BB#0: -; X64-NEXT: andb $1, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $1, %al +; X64-NEXT: ## kill: %AL %AL %EAX ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i8 %x to i1 %u = zext i1 %z to i8 ret i8 %u @@ -118,7 +110,6 @@ ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: ## kill: %AX %AX %EAX ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test6: ; X64: ## BB#0: @@ -126,7 +117,6 @@ ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: ## kill: %AX %AX %EAX ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i16 %x to i1 %u = zext i1 %z to i16 ret i16 %u @@ -139,14 +129,12 @@ ; X32-NEXT: andb $1, %al ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test7: ; X64: ## BB#0: ; X64-NEXT: andb $1, %dil ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i32 %x to i1 %u = zext i1 %z to i32 ret i32 %u @@ -159,14 +147,12 @@ ; X32-NEXT: andb $1, %al ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test8: ; X64: ## BB#0: ; X64-NEXT: andb $1, %dil ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i32 %x to i1 %u = zext i1 %z to i32 ret i32 %u @@ -178,14 +164,12 @@ ; X32-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: ## kill: %AX %AX %EAX ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test9: ; X64: ## BB#0: ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: ## kill: %AX %AX %EAX ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i8 %x to i16 ret i16 %u } @@ -195,13 +179,11 @@ ; X32: ## BB#0: ; X32-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test10: ; X64: ## BB#0: ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i8 %x to i32 ret i32 %u } @@ -213,13 +195,11 @@ ; X32-NEXT: movl %eax, %edx ; X32-NEXT: sarl $31, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test11: ; X64: ## BB#0: ; X64-NEXT: movsbq %dil, %rax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i8 %x to i64 ret i64 %u } @@ -230,14 +210,12 @@ ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: ## kill: %AX %AX %EAX ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test12: ; X64: ## BB#0: ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: ## kill: %AX %AX %EAX ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i8 %x to i16 ret i16 %u } @@ -247,13 +225,11 @@ ; X32: ## BB#0: ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test13: ; X64: ## BB#0: ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i8 %x to i32 ret i32 %u } @@ -264,13 +240,11 @@ ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test14: ; X64: ## BB#0: ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i8 %x to i64 ret i64 %u } @@ -280,13 +254,11 @@ ; X32: ## BB#0: ; X32-NEXT: movswl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test15: ; X64: ## BB#0: ; X64-NEXT: movswl %di, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i16 %x to i32 ret i32 %u } @@ -298,13 +270,11 @@ ; X32-NEXT: movl %eax, %edx ; X32-NEXT: sarl $31, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test16: ; X64: ## BB#0: ; X64-NEXT: movswq %di, %rax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i16 %x to i64 ret i64 %u } @@ -314,13 +284,11 @@ ; X32: ## BB#0: ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test17: ; X64: ## BB#0: ; X64-NEXT: movzwl %di, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i16 %x to i32 ret i32 %u } @@ -331,13 +299,11 @@ ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test18: ; X64: ## BB#0: ; X64-NEXT: movzwl %di, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i16 %x to i64 ret i64 %u } @@ -349,13 +315,11 @@ ; X32-NEXT: movl %eax, %edx ; X32-NEXT: sarl $31, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test19: ; X64: ## BB#0: ; X64-NEXT: movslq %edi, %rax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i32 %x to i64 ret i64 %u } @@ -366,13 +330,11 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test20: ; X64: ## BB#0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i32 %x to i64 ret i64 %u } Index: test/CodeGen/X86/fast-isel-store.ll =================================================================== --- test/CodeGen/X86/fast-isel-store.ll +++ test/CodeGen/X86/fast-isel-store.ll @@ -11,8 +11,8 @@ define i32 @test_store_32(i32* nocapture %addr, i32 %value) { ; ALL32-LABEL: test_store_32: ; ALL32: # BB#0: # %entry -; ALL32-NEXT: movl %esi, (%rdi) ; ALL32-NEXT: movl %esi, %eax +; ALL32-NEXT: movl %eax, (%rdi) ; ALL32-NEXT: retq ; ; ALL64-LABEL: test_store_32: @@ -29,8 +29,9 @@ define i16 @test_store_16(i16* nocapture %addr, i16 %value) { ; ALL32-LABEL: test_store_16: ; ALL32: # BB#0: # %entry -; ALL32-NEXT: movw %si, (%rdi) ; ALL32-NEXT: movl %esi, %eax +; ALL32-NEXT: movw %ax, (%rdi) +; ALL32-NEXT: # kill: %AX %AX %EAX ; ALL32-NEXT: retq ; ; ALL64-LABEL: test_store_16: @@ -58,11 +59,11 @@ ; SSE64-NEXT: movdqu %xmm0, (%eax) ; SSE64-NEXT: retl ; -; AVXONLY32-LABEL: test_store_4xi32: -; AVXONLY32: # BB#0: -; AVXONLY32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVXONLY32-NEXT: vmovdqu %xmm0, (%rdi) -; AVXONLY32-NEXT: retq +; AVX32-LABEL: test_store_4xi32: +; AVX32: # BB#0: +; AVX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX32-NEXT: vmovdqu %xmm0, (%rdi) +; AVX32-NEXT: retq ; ; AVX64-LABEL: test_store_4xi32: ; AVX64: # BB#0: @@ -70,18 +71,6 @@ ; AVX64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX64-NEXT: vmovdqu %xmm0, (%eax) ; AVX64-NEXT: retl -; -; KNL32-LABEL: test_store_4xi32: -; KNL32: # BB#0: -; KNL32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; KNL32-NEXT: vmovdqu %xmm0, (%rdi) -; KNL32-NEXT: retq -; -; SKX32-LABEL: test_store_4xi32: -; SKX32: # BB#0: -; SKX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; SKX32-NEXT: vmovdqu %xmm0, (%rdi) -; SKX32-NEXT: retq %foo = add <4 x i32> %value, %value2 ; to force integer type on store store <4 x i32> %foo, <4 x i32>* %addr, align 1 ret <4 x i32> %foo @@ -101,11 +90,11 @@ ; SSE64-NEXT: movdqa %xmm0, (%eax) ; SSE64-NEXT: retl ; -; AVXONLY32-LABEL: test_store_4xi32_aligned: -; AVXONLY32: # BB#0: -; AVXONLY32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVXONLY32-NEXT: vmovdqa %xmm0, (%rdi) -; AVXONLY32-NEXT: retq +; AVX32-LABEL: test_store_4xi32_aligned: +; AVX32: # BB#0: +; AVX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX32-NEXT: vmovdqa %xmm0, (%rdi) +; AVX32-NEXT: retq ; ; AVX64-LABEL: test_store_4xi32_aligned: ; AVX64: # BB#0: @@ -113,18 +102,6 @@ ; AVX64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX64-NEXT: vmovdqa %xmm0, (%eax) ; AVX64-NEXT: retl -; -; KNL32-LABEL: test_store_4xi32_aligned: -; KNL32: # BB#0: -; KNL32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; KNL32-NEXT: vmovdqa %xmm0, (%rdi) -; KNL32-NEXT: retq -; -; SKX32-LABEL: test_store_4xi32_aligned: -; SKX32: # BB#0: -; SKX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; SKX32-NEXT: vmovdqa %xmm0, (%rdi) -; SKX32-NEXT: retq %foo = add <4 x i32> %value, %value2 ; to force integer type on store store <4 x i32> %foo, <4 x i32>* %addr, align 16 ret <4 x i32> %foo Index: test/CodeGen/X86/fixup-bw-copy.ll =================================================================== --- test/CodeGen/X86/fixup-bw-copy.ll +++ test/CodeGen/X86/fixup-bw-copy.ll @@ -7,15 +7,11 @@ target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" define i8 @test_movb(i8 %a0) { -; BWON64-LABEL: test_movb: -; BWON64: # BB#0: -; BWON64-NEXT: movl %edi, %eax -; BWON64-NEXT: retq -; -; BWOFF64-LABEL: test_movb: -; BWOFF64: # BB#0: -; BWOFF64-NEXT: movb %dil, %al -; BWOFF64-NEXT: retq +; X64-LABEL: test_movb: +; X64: # BB#0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AL %AL %EAX +; X64-NEXT: retq ; ; X32-LABEL: test_movb: ; X32: # BB#0: @@ -25,15 +21,11 @@ } define i16 @test_movw(i16 %a0) { -; BWON64-LABEL: test_movw: -; BWON64: # BB#0: -; BWON64-NEXT: movl %edi, %eax -; BWON64-NEXT: retq -; -; BWOFF64-LABEL: test_movw: -; BWOFF64: # BB#0: -; BWOFF64-NEXT: movw %di, %ax -; BWOFF64-NEXT: retq +; X64-LABEL: test_movw: +; X64: # BB#0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AX %AX %EAX +; X64-NEXT: retq ; ; BWON32-LABEL: test_movw: ; BWON32: # BB#0: Index: test/CodeGen/X86/fold-vector-sext-crash2.ll =================================================================== --- test/CodeGen/X86/fold-vector-sext-crash2.ll +++ test/CodeGen/X86/fold-vector-sext-crash2.ll @@ -28,14 +28,14 @@ ; ; X64-LABEL: test_sext1: ; X64: # BB#0: -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq $-1, 56(%rdi) -; X64-NEXT: movq $-1, 48(%rdi) -; X64-NEXT: movq $-1, 40(%rdi) -; X64-NEXT: movq $-99, 32(%rdi) ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) +; X64-NEXT: movq $-1, 56(%rax) +; X64-NEXT: movq $-1, 48(%rax) +; X64-NEXT: movq $-1, 40(%rax) +; X64-NEXT: movq $-99, 32(%rax) ; X64-NEXT: retq %Se = sext <2 x i8> to <2 x i256> %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> @@ -66,14 +66,14 @@ ; ; X64-LABEL: test_sext2: ; X64: # BB#0: -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq $-1, 56(%rdi) -; X64-NEXT: movq $-1, 48(%rdi) -; X64-NEXT: movq $-1, 40(%rdi) -; X64-NEXT: movq $-1999, 32(%rdi) # imm = 0xF831 ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) +; X64-NEXT: movq $-1, 56(%rax) +; X64-NEXT: movq $-1, 48(%rax) +; X64-NEXT: movq $-1, 40(%rax) +; X64-NEXT: movq $-1999, 32(%rax) # imm = 0xF831 ; X64-NEXT: retq %Se = sext <2 x i128> to <2 x i256> %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> @@ -104,13 +104,13 @@ ; ; X64-LABEL: test_zext1: ; X64: # BB#0: -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 48(%rdi) -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq $0, 40(%rdi) -; X64-NEXT: movq $254, 32(%rdi) ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, 48(%rax) +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) +; X64-NEXT: movq $0, 40(%rax) +; X64-NEXT: movq $254, 32(%rax) ; X64-NEXT: retq %Se = zext <2 x i8> to <2 x i256> %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> @@ -141,13 +141,13 @@ ; ; X64-LABEL: test_zext2: ; X64: # BB#0: -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 48(%rdi) -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq $-1, 40(%rdi) -; X64-NEXT: movq $-2, 32(%rdi) ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, 48(%rax) +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) +; X64-NEXT: movq $-1, 40(%rax) +; X64-NEXT: movq $-2, 32(%rax) ; X64-NEXT: retq %Se = zext <2 x i128> to <2 x i256> %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> Index: test/CodeGen/X86/ghc-cc64.ll =================================================================== --- test/CodeGen/X86/ghc-cc64.ll +++ test/CodeGen/X86/ghc-cc64.ll @@ -22,8 +22,8 @@ define void @zap(i64 %a, i64 %b) nounwind { entry: - ; CHECK: movq %rdi, %r13 - ; CHECK-NEXT: movq %rsi, %rbp + ; CHECK: movq %rsi, %rbp + ; CHECK-NEXT: movq %rdi, %r13 ; CHECK-NEXT: callq addtwo %0 = call ghccc i64 @addtwo(i64 %a, i64 %b) ; CHECK: callq foo Index: test/CodeGen/X86/hipe-cc64.ll =================================================================== --- test/CodeGen/X86/hipe-cc64.ll +++ test/CodeGen/X86/hipe-cc64.ll @@ -4,11 +4,10 @@ define void @zap(i64 %a, i64 %b) nounwind { entry: - ; CHECK: movq %rsi, %rax + ; CHECK: movq %rsi, %rdx ; CHECK-NEXT: movl $8, %ecx ; CHECK-NEXT: movl $9, %r8d ; CHECK-NEXT: movq %rdi, %rsi - ; CHECK-NEXT: movq %rax, %rdx ; CHECK-NEXT: callq addfour %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9) %res = extractvalue {i64, i64, i64} %0, 2 Index: test/CodeGen/X86/iabs.ll =================================================================== --- test/CodeGen/X86/iabs.ll +++ test/CodeGen/X86/iabs.ll @@ -22,10 +22,11 @@ ; X64-LABEL: test_i8: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: sarb $7, %al -; X64-NEXT: addb %al, %dil -; X64-NEXT: xorb %al, %dil -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: sarb $7, %cl +; X64-NEXT: addb %cl, %al +; X64-NEXT: xorb %cl, %al +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq %tmp1neg = sub i8 0, %a %b = icmp sgt i8 %a, -1 Index: test/CodeGen/X86/imul.ll =================================================================== --- test/CodeGen/X86/imul.ll +++ test/CodeGen/X86/imul.ll @@ -158,16 +158,16 @@ define i32 @mul4294967295_32(i32 %A) { ; X64-LABEL: mul4294967295_32: -; X64: negl %edi -; X64-NEXT: movl %edi, %eax +; X64: movl %edi, %eax +; X64-NEXT: negl %eax %mul = mul i32 %A, 4294967295 ret i32 %mul } define i64 @mul18446744073709551615_64(i64 %A) { ; X64-LABEL: mul18446744073709551615_64: -; X64: negq %rdi -; X64-NEXT: movq %rdi, %rax +; X64: movq %rdi, %rax +; X64-NEXT: negq %rax %mul = mul i64 %A, 18446744073709551615 ret i64 %mul } Index: test/CodeGen/X86/ipra-local-linkage.ll =================================================================== --- test/CodeGen/X86/ipra-local-linkage.ll +++ test/CodeGen/X86/ipra-local-linkage.ll @@ -24,7 +24,7 @@ call void @foo() ; CHECK-LABEL: bar: ; CHECK: callq foo - ; CHECK-NEXT: movl %eax, %r15d + ; CHECK-NEXT: movl %edi, %r15d call void asm sideeffect "movl $0, %r12d", "{r15}~{r12}"(i32 %X) ret void } Index: test/CodeGen/X86/legalize-shift-64.ll =================================================================== --- test/CodeGen/X86/legalize-shift-64.ll +++ test/CodeGen/X86/legalize-shift-64.ll @@ -88,6 +88,8 @@ ; CHECK-NEXT: .cfi_offset %ebx, -12 ; CHECK-NEXT: .cfi_offset %ebp, -8 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %ch +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -101,12 +103,11 @@ ; CHECK-NEXT: movl %edi, %esi ; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl %edx, %ebx -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movb %ch, %cl ; CHECK-NEXT: shll %cl, %ebx ; CHECK-NEXT: shldl %cl, %edx, %ebp -; CHECK-NEXT: testb $32, %cl +; CHECK-NEXT: testb $32, %ch ; CHECK-NEXT: je .LBB4_4 ; CHECK-NEXT: # BB#3: ; CHECK-NEXT: movl %ebx, %ebp Index: test/CodeGen/X86/legalize-shl-vec.ll =================================================================== --- test/CodeGen/X86/legalize-shl-vec.ll +++ test/CodeGen/X86/legalize-shl-vec.ll @@ -26,12 +26,12 @@ ; ; X64-LABEL: test_shl: ; X64: # BB#0: -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 48(%rdi) -; X64-NEXT: movaps %xmm0, 32(%rdi) -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, 48(%rax) +; X64-NEXT: movaps %xmm0, 32(%rax) +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) ; X64-NEXT: retq %Amt = insertelement <2 x i256> undef, i256 -1, i32 0 %Out = shl <2 x i256> %In, %Amt @@ -62,12 +62,12 @@ ; ; X64-LABEL: test_srl: ; X64: # BB#0: -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 48(%rdi) -; X64-NEXT: movaps %xmm0, 32(%rdi) -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, 48(%rax) +; X64-NEXT: movaps %xmm0, 32(%rax) +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) ; X64-NEXT: retq %Amt = insertelement <2 x i256> undef, i256 -1, i32 0 %Out = lshr <2 x i256> %In, %Amt @@ -108,19 +108,19 @@ ; ; X64-LABEL: test_sra: ; X64: # BB#0: -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi ; X64-NEXT: sarq $63, %r8 -; X64-NEXT: movq %rdx, 56(%rdi) -; X64-NEXT: movq %rcx, 48(%rdi) -; X64-NEXT: movq %rax, 40(%rdi) -; X64-NEXT: movq %r9, 32(%rdi) -; X64-NEXT: movq %r8, 24(%rdi) -; X64-NEXT: movq %r8, 16(%rdi) -; X64-NEXT: movq %r8, 8(%rdi) -; X64-NEXT: movq %r8, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rsi, 56(%rax) +; X64-NEXT: movq %rdx, 48(%rax) +; X64-NEXT: movq %rcx, 40(%rax) +; X64-NEXT: movq %r9, 32(%rax) +; X64-NEXT: movq %r8, 24(%rax) +; X64-NEXT: movq %r8, 16(%rax) +; X64-NEXT: movq %r8, 8(%rax) +; X64-NEXT: movq %r8, (%rax) ; X64-NEXT: retq %Amt = insertelement <2 x i256> undef, i256 -1, i32 0 %Out = ashr <2 x i256> %In, %Amt Index: test/CodeGen/X86/machine-combiner-int.ll =================================================================== --- test/CodeGen/X86/machine-combiner-int.ll +++ test/CodeGen/X86/machine-combiner-int.ll @@ -62,10 +62,11 @@ define i8 @reassociate_ands_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_ands_i8: ; CHECK: # BB#0: -; CHECK-NEXT: subb %sil, %dil -; CHECK-NEXT: andb %cl, %dl -; CHECK-NEXT: andb %dil, %dl ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: andb %cl, %al +; CHECK-NEXT: andb %dil, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = and i8 %x2, %t0 @@ -78,10 +79,10 @@ define i32 @reassociate_ands_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_ands_i32: ; CHECK: # BB#0: -; CHECK-NEXT: subl %esi, %edi -; CHECK-NEXT: andl %ecx, %edx -; CHECK-NEXT: andl %edi, %edx ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: andl %ecx, %eax +; CHECK-NEXT: andl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = and i32 %x2, %t0 @@ -92,10 +93,10 @@ define i64 @reassociate_ands_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_ands_i64: ; CHECK: # BB#0: -; CHECK-NEXT: subq %rsi, %rdi -; CHECK-NEXT: andq %rcx, %rdx -; CHECK-NEXT: andq %rdi, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: andq %rcx, %rax +; CHECK-NEXT: andq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = and i64 %x2, %t0 @@ -109,10 +110,11 @@ define i8 @reassociate_ors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_ors_i8: ; CHECK: # BB#0: -; CHECK-NEXT: subb %sil, %dil -; CHECK-NEXT: orb %cl, %dl -; CHECK-NEXT: orb %dil, %dl ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: orb %cl, %al +; CHECK-NEXT: orb %dil, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = or i8 %x2, %t0 @@ -125,10 +127,10 @@ define i32 @reassociate_ors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_ors_i32: ; CHECK: # BB#0: -; CHECK-NEXT: subl %esi, %edi -; CHECK-NEXT: orl %ecx, %edx -; CHECK-NEXT: orl %edi, %edx ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: orl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = or i32 %x2, %t0 @@ -139,10 +141,10 @@ define i64 @reassociate_ors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_ors_i64: ; CHECK: # BB#0: -; CHECK-NEXT: subq %rsi, %rdi -; CHECK-NEXT: orq %rcx, %rdx -; CHECK-NEXT: orq %rdi, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: orq %rcx, %rax +; CHECK-NEXT: orq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = or i64 %x2, %t0 @@ -156,10 +158,11 @@ define i8 @reassociate_xors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_xors_i8: ; CHECK: # BB#0: -; CHECK-NEXT: subb %sil, %dil -; CHECK-NEXT: xorb %cl, %dl -; CHECK-NEXT: xorb %dil, %dl ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: xorb %cl, %al +; CHECK-NEXT: xorb %dil, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = xor i8 %x2, %t0 @@ -172,10 +175,10 @@ define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_xors_i32: ; CHECK: # BB#0: -; CHECK-NEXT: subl %esi, %edi -; CHECK-NEXT: xorl %ecx, %edx -; CHECK-NEXT: xorl %edi, %edx ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: xorl %ecx, %eax +; CHECK-NEXT: xorl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = xor i32 %x2, %t0 @@ -186,10 +189,10 @@ define i64 @reassociate_xors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_xors_i64: ; CHECK: # BB#0: -; CHECK-NEXT: subq %rsi, %rdi -; CHECK-NEXT: xorq %rcx, %rdx -; CHECK-NEXT: xorq %rdi, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: xorq %rcx, %rax +; CHECK-NEXT: xorq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = xor i64 %x2, %t0 Index: test/CodeGen/X86/machine-cse.ll =================================================================== --- test/CodeGen/X86/machine-cse.ll +++ test/CodeGen/X86/machine-cse.ll @@ -133,24 +133,24 @@ define i8* @bsd_memchr(i8* %s, i32 %a, i32 %c, i64 %n) nounwind ssp { ; CHECK-LABEL: bsd_memchr: ; CHECK: # BB#0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: testq %rcx, %rcx ; CHECK-NEXT: je .LBB3_4 ; CHECK-NEXT: # BB#1: # %preheader -; CHECK-NEXT: movzbl %dl, %eax +; CHECK-NEXT: movzbl %dl, %edx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB3_2: # %do.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cmpl %eax, %esi +; CHECK-NEXT: cmpl %edx, %esi ; CHECK-NEXT: je .LBB3_5 ; CHECK-NEXT: # BB#3: # %do.cond ; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 -; CHECK-NEXT: incq %rdi +; CHECK-NEXT: incq %rax ; CHECK-NEXT: decq %rcx ; CHECK-NEXT: jne .LBB3_2 ; CHECK-NEXT: .LBB3_4: -; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: .LBB3_5: # %return -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq entry: %cmp = icmp eq i64 %n, 0 Index: test/CodeGen/X86/mask-negated-bool.ll =================================================================== --- test/CodeGen/X86/mask-negated-bool.ll +++ test/CodeGen/X86/mask-negated-bool.ll @@ -4,8 +4,8 @@ define i32 @mask_negated_zext_bool1(i1 %x) { ; CHECK-LABEL: mask_negated_zext_bool1: ; CHECK: # BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %ext = zext i1 %x to i32 %neg = sub i32 0, %ext @@ -38,8 +38,8 @@ define i32 @mask_negated_sext_bool1(i1 %x) { ; CHECK-LABEL: mask_negated_sext_bool1: ; CHECK: # BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %ext = sext i1 %x to i32 %neg = sub i32 0, %ext Index: test/CodeGen/X86/misched-matmul.ll =================================================================== --- test/CodeGen/X86/misched-matmul.ll +++ test/CodeGen/X86/misched-matmul.ll @@ -10,7 +10,7 @@ ; more complex cases. ; ; CHECK: @wrap_mul4 -; CHECK: 23 regalloc - Number of spills inserted +; CHECK: 25 regalloc - Number of spills inserted define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 { entry: Index: test/CodeGen/X86/mul-constant-i16.ll =================================================================== --- test/CodeGen/X86/mul-constant-i16.ll +++ test/CodeGen/X86/mul-constant-i16.ll @@ -11,6 +11,7 @@ ; X64-LABEL: test_mul_by_1: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 1 ret i16 %mul @@ -296,8 +297,9 @@ ; ; X64-LABEL: test_mul_by_16: ; X64: # BB#0: -; X64-NEXT: shll $4, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $4, %eax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 16 ret i16 %mul @@ -633,8 +635,9 @@ ; ; X64-LABEL: test_mul_by_32: ; X64: # BB#0: -; X64-NEXT: shll $5, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $5, %eax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 32 ret i16 %mul Index: test/CodeGen/X86/mul-constant-i32.ll =================================================================== --- test/CodeGen/X86/mul-constant-i32.ll +++ test/CodeGen/X86/mul-constant-i32.ll @@ -780,14 +780,14 @@ ; ; X64-HSW-LABEL: test_mul_by_16: ; X64-HSW: # BB#0: -; X64-HSW-NEXT: shll $4, %edi # sched: [1:0.50] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_16: ; X64-JAG: # BB#0: -; X64-JAG-NEXT: shll $4, %edi # sched: [1:0.50] ; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: shll $4, %eax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_16: @@ -798,26 +798,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_16: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: shll $4, %eax # sched: [1:0.50] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_16: ; JAG-NOOPT: # BB#0: -; JAG-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50] ; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17] +; JAG-NOOPT-NEXT: shll $4, %eax # sched: [1:0.50] ; JAG-NOOPT-NEXT: retq # sched: [4:1.00] ; ; X64-SLM-LABEL: test_mul_by_16: ; X64-SLM: # BB#0: -; X64-SLM-NEXT: shll $4, %edi # sched: [1:1.00] ; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: shll $4, %eax # sched: [1:1.00] ; X64-SLM-NEXT: retq # sched: [4:1.00] ; ; SLM-NOOPT-LABEL: test_mul_by_16: ; SLM-NOOPT: # BB#0: -; SLM-NOOPT-NEXT: shll $4, %edi # sched: [1:1.00] ; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NOOPT-NEXT: shll $4, %eax # sched: [1:1.00] ; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 16 ret i32 %mul @@ -1626,14 +1626,14 @@ ; ; X64-HSW-LABEL: test_mul_by_32: ; X64-HSW: # BB#0: -; X64-HSW-NEXT: shll $5, %edi # sched: [1:0.50] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_32: ; X64-JAG: # BB#0: -; X64-JAG-NEXT: shll $5, %edi # sched: [1:0.50] ; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_32: @@ -1644,26 +1644,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_32: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: shll $5, %eax # sched: [1:0.50] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_32: ; JAG-NOOPT: # BB#0: -; JAG-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50] ; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17] +; JAG-NOOPT-NEXT: shll $5, %eax # sched: [1:0.50] ; JAG-NOOPT-NEXT: retq # sched: [4:1.00] ; ; X64-SLM-LABEL: test_mul_by_32: ; X64-SLM: # BB#0: -; X64-SLM-NEXT: shll $5, %edi # sched: [1:1.00] ; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: shll $5, %eax # sched: [1:1.00] ; X64-SLM-NEXT: retq # sched: [4:1.00] ; ; SLM-NOOPT-LABEL: test_mul_by_32: ; SLM-NOOPT: # BB#0: -; SLM-NOOPT-NEXT: shll $5, %edi # sched: [1:1.00] ; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NOOPT-NEXT: shll $5, %eax # sched: [1:1.00] ; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 32 ret i32 %mul Index: test/CodeGen/X86/mul-constant-i64.ll =================================================================== --- test/CodeGen/X86/mul-constant-i64.ll +++ test/CodeGen/X86/mul-constant-i64.ll @@ -802,14 +802,14 @@ ; ; X64-HSW-LABEL: test_mul_by_16: ; X64-HSW: # BB#0: -; X64-HSW-NEXT: shlq $4, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_16: ; X64-JAG: # BB#0: -; X64-JAG-NEXT: shlq $4, %rdi # sched: [1:0.50] ; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: shlq $4, %rax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_16: @@ -822,26 +822,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_16: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: shlq $4, %rax # sched: [1:0.50] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_16: ; JAG-NOOPT: # BB#0: -; JAG-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50] ; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17] +; JAG-NOOPT-NEXT: shlq $4, %rax # sched: [1:0.50] ; JAG-NOOPT-NEXT: retq # sched: [4:1.00] ; ; X64-SLM-LABEL: test_mul_by_16: ; X64-SLM: # BB#0: -; X64-SLM-NEXT: shlq $4, %rdi # sched: [1:1.00] ; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: shlq $4, %rax # sched: [1:1.00] ; X64-SLM-NEXT: retq # sched: [4:1.00] ; ; SLM-NOOPT-LABEL: test_mul_by_16: ; SLM-NOOPT: # BB#0: -; SLM-NOOPT-NEXT: shlq $4, %rdi # sched: [1:1.00] ; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NOOPT-NEXT: shlq $4, %rax # sched: [1:1.00] ; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 16 ret i64 %mul @@ -1707,14 +1707,14 @@ ; ; X64-HSW-LABEL: test_mul_by_32: ; X64-HSW: # BB#0: -; X64-HSW-NEXT: shlq $5, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_32: ; X64-JAG: # BB#0: -; X64-JAG-NEXT: shlq $5, %rdi # sched: [1:0.50] ; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_32: @@ -1727,26 +1727,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_32: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: shlq $5, %rax # sched: [1:0.50] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_32: ; JAG-NOOPT: # BB#0: -; JAG-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50] ; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17] +; JAG-NOOPT-NEXT: shlq $5, %rax # sched: [1:0.50] ; JAG-NOOPT-NEXT: retq # sched: [4:1.00] ; ; X64-SLM-LABEL: test_mul_by_32: ; X64-SLM: # BB#0: -; X64-SLM-NEXT: shlq $5, %rdi # sched: [1:1.00] ; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: shlq $5, %rax # sched: [1:1.00] ; X64-SLM-NEXT: retq # sched: [4:1.00] ; ; SLM-NOOPT-LABEL: test_mul_by_32: ; SLM-NOOPT: # BB#0: -; SLM-NOOPT-NEXT: shlq $5, %rdi # sched: [1:1.00] ; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NOOPT-NEXT: shlq $5, %rax # sched: [1:1.00] ; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 32 ret i64 %mul Index: test/CodeGen/X86/mul-i1024.ll =================================================================== --- test/CodeGen/X86/mul-i1024.ll +++ test/CodeGen/X86/mul-i1024.ll @@ -237,8 +237,8 @@ ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: pushl %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %eax @@ -247,8 +247,8 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: pushl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: pushl %edi ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -262,11 +262,11 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload -; X32-NEXT: pushl %edi +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl %esi -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp @@ -274,34 +274,34 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload -; X32-NEXT: pushl %edi +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: pushl %edi +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl $0 -; X32-NEXT: pushl $0 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: pushl %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: pushl %edi +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $0 +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %esi +; X32-NEXT: pushl %edi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -424,9 +424,9 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: pushl %esi ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp @@ -435,8 +435,8 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: pushl %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: pushl %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -448,34 +448,34 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl %edi -; X32-NEXT: pushl %esi +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: pushl %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: pushl %edi ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: pushl %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload ; X32-NEXT: pushl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload -; X32-NEXT: pushl %esi +; X32-NEXT: pushl %edi ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -499,10 +499,10 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: pushl %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -513,8 +513,8 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %esi ; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -526,10 +526,10 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload ; X32-NEXT: pushl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -540,8 +540,8 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %esi ; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -674,10 +674,10 @@ ; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: pushl %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: pushl %esi ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp @@ -689,8 +689,8 @@ ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %esi ; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp @@ -847,32 +847,32 @@ ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload ; X32-NEXT: pushl %ebx +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: pushl %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %esi ; X32-NEXT: pushl %ebx +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: movl %esi, %ebx ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -889,8 +889,8 @@ ; X32-NEXT: pushl %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: pushl %esi -; X32-NEXT: pushl %ebx ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp @@ -924,9 +924,9 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: pushl %edi +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl %esi @@ -937,8 +937,8 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %edi +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -991,7 +991,8 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -1002,10 +1003,10 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: pushl %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: pushl %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -1016,14 +1017,14 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi -; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: pushl %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload ; X32-NEXT: pushl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: pushl %edi ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp @@ -1031,12 +1032,11 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp @@ -1047,8 +1047,8 @@ ; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: pushl %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 @@ -1056,13 +1056,13 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: pushl %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: pushl %ebx ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %edi +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 @@ -1070,8 +1070,8 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 +; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: pushl %ebx ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload @@ -1110,10 +1110,10 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: pushl %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload ; X32-NEXT: pushl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: pushl %edi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -1124,8 +1124,8 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload @@ -1204,8 +1204,7 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: pushl %esi +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 @@ -1217,10 +1216,10 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: pushl %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: pushl %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -1231,60 +1230,61 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload ; X32-NEXT: pushl %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: pushl %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: pushl %esi ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %esi ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: pushl %edi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %esi ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl %edi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: pushl %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: pushl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: pushl %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload -; X32-NEXT: pushl %edi +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 @@ -1312,6 +1312,7 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl %edi +; X32-NEXT: movl %edi, %ebx ; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 @@ -1341,32 +1342,32 @@ ; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: pushl %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: pushl %edi -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 +; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl %edi, %ebx -; X32-NEXT: pushl %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: pushl %edi +; X32-NEXT: movl %ebx, %esi ; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 @@ -1380,10 +1381,10 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: pushl %edi ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload ; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 @@ -1407,7 +1408,7 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 @@ -1419,18 +1420,18 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %edi ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: pushl %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -1441,8 +1442,8 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %esi ; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload @@ -1470,7 +1471,6 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: movl %esi, %edi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -1481,8 +1481,8 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: pushl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: pushl %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 @@ -1495,39 +1495,40 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload ; X32-NEXT: pushl %ebx +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: pushl %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: pushl %esi -; X32-NEXT: pushl %edi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %ebx +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %esi ; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp @@ -1540,8 +1541,8 @@ ; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %ebx ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp @@ -1552,9 +1553,9 @@ ; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: pushl %edi -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp @@ -1567,8 +1568,8 @@ ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %edi ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: pushl %edi ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp @@ -1632,10 +1633,10 @@ ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: pushl %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: pushl %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp @@ -1646,9 +1647,9 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: pushl %edi +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp @@ -1657,8 +1658,8 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %eax @@ -1671,8 +1672,8 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %edi +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp @@ -1691,10 +1692,10 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: pushl %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -1705,35 +1706,37 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %esi ; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: pushl %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: pushl %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: pushl %esi ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: pushl %ebx ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi -; X32-NEXT: pushl %esi ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: pushl %ecx +; X32-NEXT: movl %ecx, %ebx +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -1744,10 +1747,10 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: pushl %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -1758,8 +1761,8 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %esi ; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload @@ -1799,9 +1802,9 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: pushl %esi +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl %ebx @@ -1812,35 +1815,36 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %esi +; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: pushl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: pushl %esi ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: pushl %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %esi ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %esi ; X32-NEXT: pushl %edi +; X32-NEXT: movl %edi, %esi +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -1851,8 +1855,8 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: pushl %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: pushl %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: pushl %edi ; X32-NEXT: pushl $0 @@ -1865,12 +1869,12 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %esi +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: pushl %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: pushl %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %eax @@ -1879,12 +1883,12 @@ ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %esi +; X32-NEXT: pushl %edi ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 @@ -1893,7 +1897,7 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload -; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload @@ -1957,10 +1961,10 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: pushl %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: pushl %esi ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: pushl %eax @@ -1973,8 +1977,8 @@ ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $0 -; X32-NEXT: pushl %esi ; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi ; X32-NEXT: pushl %eax ; X32-NEXT: calll __multi3 ; X32-NEXT: addl $32, %esp @@ -2106,11 +2110,11 @@ ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: adcl %esi, %edi ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload @@ -2119,8 +2123,8 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload ; X32-NEXT: addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill @@ -2139,8 +2143,8 @@ ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -3215,24 +3219,22 @@ ; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload ; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: adcl %edi, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: adcl %edi, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: addl %edx, %ebx -; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: adcl %esi, %ebx -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload -; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: adcl %esi, %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: adcl %eax, %esi +; X32-NEXT: adcl %ecx, %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill @@ -3243,20 +3245,20 @@ ; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %edx, %eax ; X32-NEXT: adcl $0, %eax +; X32-NEXT: movl %esi, %ecx +; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax @@ -3519,30 +3521,31 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %edx, %eax +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movl %edi, %edx -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: adcl %edi, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: adcl %edi, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: addl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: adcl $0, %edx +; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill @@ -3853,27 +3856,27 @@ ; X32-NEXT: setb %al ; X32-NEXT: addl {{[0-9]+}}(%esp), %edx ; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movzbl %al, %eax -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movzbl %al, %edi +; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: adcl $0, %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: addl %edx, %eax +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %edx, %ebx +; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %edi, %edx -; X32-NEXT: adcl %ebx, %edx +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: movl %ecx, %edx +; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill @@ -3885,13 +3888,13 @@ ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl $0, %eax +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload @@ -4275,17 +4278,16 @@ ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r11, %r12 -; X64-NEXT: movq %r11, %r8 -; X64-NEXT: addq %rax, %r12 -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r9 -; X64-NEXT: movq %r9, (%rsp) # 8-byte Spill -; X64-NEXT: adcq %rdx, %rax -; X64-NEXT: addq %rbp, %r12 -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rbx, %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %rax, %rcx +; X64-NEXT: movq %rdi, %r14 +; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rdx, %r14 +; X64-NEXT: addq %rbp, %rcx +; X64-NEXT: movq %rcx, %r12 +; X64-NEXT: movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rbx, %r14 +; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq (%rsi), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: xorl %ebp, %ebp @@ -4295,38 +4297,37 @@ ; X64-NEXT: movq 8(%rsi), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %rbp -; X64-NEXT: xorl %r11d, %r11d +; X64-NEXT: xorl %r9d, %r9d ; X64-NEXT: movq %rax, %r15 ; X64-NEXT: addq %rcx, %r15 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: addq %rdi, %r15 ; X64-NEXT: adcq %rcx, %rbp -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: setb %bl ; X64-NEXT: addq %rax, %rbp ; X64-NEXT: movzbl %bl, %ebx ; X64-NEXT: adcq %rdx, %rbx ; X64-NEXT: movq 16(%rsi), %rax ; X64-NEXT: movq %rsi, %r13 -; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rdi, %r14 -; X64-NEXT: addq %rax, %r14 -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: adcq %rdx, %r11 -; X64-NEXT: addq %rbp, %r14 -; X64-NEXT: adcq %rbx, %r11 -; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq %r8, %rbp -; X64-NEXT: movq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdi, %r9 +; X64-NEXT: addq %rax, %r9 +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: adcq %rdx, %rax +; X64-NEXT: addq %rbp, %r9 +; X64-NEXT: adcq %rbx, %rax +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: addq %rdi, %rax -; X64-NEXT: movq %r9, %rax -; X64-NEXT: adcq %rcx, %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rcx, %r8 +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq (%r10), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: xorl %r8d, %r8d @@ -4334,44 +4335,44 @@ ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rdi, %rax -; X64-NEXT: movq %rdi, %r9 ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: adcq %rcx, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq 32(%r13), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %r8 -; X64-NEXT: xorl %r8d, %r8d +; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq %rbx, %rcx +; X64-NEXT: movq %rbx, %r8 ; X64-NEXT: addq %r13, %rax ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: adcq %rdx, %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: addq %r9, %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %r15, %rax +; X64-NEXT: movq %r11, %rax +; X64-NEXT: addq %rdi, %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %r14, %r12 -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdi, %r11 +; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %r11, %rax +; X64-NEXT: adcq %r15, %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r11, %rdi +; X64-NEXT: movq %r12, %rax +; X64-NEXT: adcq %r9, %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rbp, %r14 +; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rbp, %rdi ; X64-NEXT: movq 8(%r10), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %rsi, %r11 +; X64-NEXT: movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rax, %r12 +; X64-NEXT: addq %rsi, %r12 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq %rcx, %r11 +; X64-NEXT: addq %r8, %r12 ; X64-NEXT: adcq %rsi, %rbp ; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: setb %bl @@ -4380,92 +4381,91 @@ ; X64-NEXT: adcq %rdx, %rbx ; X64-NEXT: movq 16(%r10), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %r8 +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rcx, %r8 -; X64-NEXT: addq %rax, %r8 +; X64-NEXT: movq %r8, %rcx +; X64-NEXT: addq %rax, %rcx ; X64-NEXT: movq %rsi, %r10 ; X64-NEXT: adcq %rdx, %r10 -; X64-NEXT: addq %rbp, %r8 -; X64-NEXT: movq %r8, %rax +; X64-NEXT: addq %rbp, %rcx ; X64-NEXT: adcq %rbx, %r10 -; X64-NEXT: movq %rcx, %rdx -; X64-NEXT: movq %rcx, %r12 -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: addq %r9, %rdx +; X64-NEXT: movq %r8, %rdx +; X64-NEXT: movq %r8, %r14 +; X64-NEXT: movq %r14, (%rsp) # 8-byte Spill +; X64-NEXT: addq %r11, %rdx ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r11, %r8 -; X64-NEXT: adcq %r8, %r15 +; X64-NEXT: movq %r12, %rsi +; X64-NEXT: adcq %rsi, %r15 ; X64-NEXT: movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rax, %r14 -; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rax, %rcx +; X64-NEXT: adcq %rcx, %r9 +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, %r8 ; X64-NEXT: adcq %r10, %rdi ; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq 40(%rsi), %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: xorl %r14d, %r14d -; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: addq %r9, %rdi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq 40(%rdi), %rax +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: xorl %r9d, %r9d +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: addq %r11, %rcx ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq %r13, %rdi -; X64-NEXT: adcq %r9, %rbp +; X64-NEXT: addq %r13, %rcx +; X64-NEXT: adcq %r11, %rbp ; X64-NEXT: setb %bl ; X64-NEXT: addq %rax, %rbp -; X64-NEXT: movzbl %bl, %r11d -; X64-NEXT: adcq %rdx, %r11 -; X64-NEXT: movq 48(%rsi), %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %r14 +; X64-NEXT: movzbl %bl, %ebx +; X64-NEXT: adcq %rdx, %rbx +; X64-NEXT: movq 48(%rdi), %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r13, %rbx -; X64-NEXT: addq %rax, %rbx -; X64-NEXT: movq %r9, %rsi -; X64-NEXT: adcq %rdx, %rsi -; X64-NEXT: addq %rbp, %rbx -; X64-NEXT: adcq %r11, %rsi -; X64-NEXT: movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: addq %r13, %r12 -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rdi, %r8 +; X64-NEXT: movq %r13, %r12 +; X64-NEXT: addq %rax, %r12 +; X64-NEXT: movq %r11, %rdi +; X64-NEXT: adcq %rdx, %rdi +; X64-NEXT: addq %rbp, %r12 +; X64-NEXT: adcq %rbx, %rdi +; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %r13, %r14 +; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rcx, %rsi +; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %r12, %r8 ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rbx, %rcx -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rsi, %r10 +; X64-NEXT: adcq %rdi, %r10 ; X64-NEXT: movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload -; X64-NEXT: movq %rdx, %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: movq %r8, %rax ; X64-NEXT: addq %r13, %rax -; X64-NEXT: movq (%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %r9, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: adcq %r11, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rdx, %rax +; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %r8, %r10 ; X64-NEXT: addq %r13, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload +; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload ; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: movq %rbx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rax, %r9 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rax, %r14 ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: movq 56(%rax), %r11 ; X64-NEXT: movq %r11, %rax ; X64-NEXT: movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r10 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rsi, %rbx @@ -4480,19 +4480,19 @@ ; X64-NEXT: setb %cl ; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r11 +; X64-NEXT: movq %rdi, %r13 ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r15 ; X64-NEXT: adcq %rdx, %r12 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r10, %rbp +; X64-NEXT: movq %r9, %rbp ; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill @@ -4504,65 +4504,63 @@ ; X64-NEXT: addq %rsi, %rbx ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r10 -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rcx, %r11 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %rbp, %rcx ; X64-NEXT: setb %bl ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rax, %rsi ; X64-NEXT: addq %rcx, %rsi ; X64-NEXT: movzbl %bl, %eax ; X64-NEXT: adcq %rax, %r13 ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload -; X64-NEXT: addq %r9, %rsi +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload +; X64-NEXT: addq %r14, %rsi ; X64-NEXT: adcq %r8, %r13 ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %r12 -; X64-NEXT: movq %r10, %rbx +; X64-NEXT: movq %r11, %rbx ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload -; X64-NEXT: mulq %r11 +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r10 +; X64-NEXT: movq %rax, %r14 ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r9 -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdi, %r11 +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: movq 24(%rax), %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq 24(%rax), %r9 ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %rbx -; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rbp, %r8 ; X64-NEXT: adcq %rdi, %rcx -; X64-NEXT: setb %dil -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %rbx +; X64-NEXT: setb %bl +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %dil, %ecx +; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: addq %r14, %rbp -; X64-NEXT: movq (%rsp), %rbx # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: adcq %r9, %rbx +; X64-NEXT: movq %r10, %rbp +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: addq %r11, %rbp +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbx # 8-byte Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: adcq %r10, %rbx ; X64-NEXT: addq %rax, %rbp ; X64-NEXT: adcq %rdx, %rbx -; X64-NEXT: addq %rsi, %r10 -; X64-NEXT: movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %rsi, %r14 +; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %r13, %r8 ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rbp @@ -4572,88 +4570,86 @@ ; X64-NEXT: setb %r15b ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r11, %rsi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %r11 +; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r12 # 8-byte Reload ; X64-NEXT: movq %r12, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %r11, %rdi +; X64-NEXT: addq %r14, %rdi ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload -; X64-NEXT: mulq %r8 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %rdi, %r11 +; X64-NEXT: movq %rax, %r14 +; X64-NEXT: addq %rdi, %r14 ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil ; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %r8, %r12 +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %r9, %r12 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: addq %r14, %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: adcq %r9, %r14 +; X64-NEXT: addq %r11, %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: adcq %r10, %r9 ; X64-NEXT: addq %rax, %rcx -; X64-NEXT: adcq %rdx, %r14 +; X64-NEXT: adcq %rdx, %r9 ; X64-NEXT: addq %rbp, %r13 -; X64-NEXT: adcq %rbx, %r11 +; X64-NEXT: adcq %rbx, %r14 ; X64-NEXT: movzbl %r15b, %eax ; X64-NEXT: adcq %rax, %rcx -; X64-NEXT: adcq $0, %r14 +; X64-NEXT: adcq $0, %r9 ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload ; X64-NEXT: movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload -; X64-NEXT: movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload ; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload +; X64-NEXT: movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: mulq %rsi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: movq 24(%rax), %rcx ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %r11 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rbx, %rbp ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: mulq %r9 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %r15 ; X64-NEXT: addq %rbp, %r15 ; X64-NEXT: adcq %rsi, %rbx ; X64-NEXT: setb %sil ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r8 ; X64-NEXT: adcq %rdx, %r10 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r11, %rbp +; X64-NEXT: movq %r9, %rbp ; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill @@ -4665,8 +4661,8 @@ ; X64-NEXT: addq %rdi, %rbx ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rcx, %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill @@ -4674,7 +4670,7 @@ ; X64-NEXT: setb %cl ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: movq %rsi, %rbp -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rdi, %rbx @@ -4686,11 +4682,11 @@ ; X64-NEXT: adcq %r15, %rsi ; X64-NEXT: adcq $0, %r8 ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %r9, %rax ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r9 +; X64-NEXT: movq %rax, %r11 ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: movq %rbp, %r14 ; X64-NEXT: mulq %rdi @@ -4699,11 +4695,11 @@ ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbp, %rax -; X64-NEXT: movq %rax, %r11 +; X64-NEXT: movq %rax, %r9 ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %dil ; X64-NEXT: movq %r14, %rax @@ -4711,7 +4707,7 @@ ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq (%rsp), %rdi # 8-byte Reload ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r13 # 8-byte Reload ; X64-NEXT: addq %r13, %rdi ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload @@ -4719,65 +4715,63 @@ ; X64-NEXT: adcq %r14, %rbp ; X64-NEXT: addq %rax, %rdi ; X64-NEXT: adcq %rdx, %rbp -; X64-NEXT: addq %rbx, %r9 -; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rsi, %r11 +; X64-NEXT: addq %rbx, %r11 ; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rsi, %r9 +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: addq %r8, %rdi ; X64-NEXT: adcq %r10, %rbp -; X64-NEXT: setb %r9b +; X64-NEXT: setb %r10b ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movq %rax, %r11 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload -; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r10, %rbx +; X64-NEXT: addq %r8, %rbx ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r15 -; X64-NEXT: addq %rbx, %r15 +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %rbx, %r8 ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %bl -; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r12 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload -; X64-NEXT: movq %r10, %rcx -; X64-NEXT: addq %r13, %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload -; X64-NEXT: movq %rbx, %rsi -; X64-NEXT: movq %rbx, %r12 -; X64-NEXT: adcq %r14, %rsi -; X64-NEXT: addq %rax, %rcx -; X64-NEXT: adcq %rdx, %rsi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq %r9, %r15 +; X64-NEXT: addq %r13, %r15 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload +; X64-NEXT: movq %r12, %r13 +; X64-NEXT: adcq %r14, %r13 +; X64-NEXT: addq %rax, %r15 +; X64-NEXT: adcq %rdx, %r13 ; X64-NEXT: addq %rdi, %r11 -; X64-NEXT: adcq %rbp, %r15 -; X64-NEXT: movzbl %r9b, %eax -; X64-NEXT: adcq %rax, %rcx -; X64-NEXT: adcq $0, %rsi +; X64-NEXT: adcq %rbp, %r8 +; X64-NEXT: movzbl %r10b, %eax +; X64-NEXT: adcq %rax, %r15 +; X64-NEXT: adcq $0, %r13 ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload ; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload -; X64-NEXT: movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload ; X64-NEXT: adcq $0, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill ; X64-NEXT: adcq $0, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill ; X64-NEXT: adcq $0, {{[0-9]+}}(%rsp) # 8-byte Folded Spill @@ -4788,104 +4782,106 @@ ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r14 -; X64-NEXT: movq %r8, %rbp -; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %r11 +; X64-NEXT: movq %rcx, %rbp ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: addq %rsi, %rcx ; X64-NEXT: adcq $0, %rbx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rcx, %r8 ; X64-NEXT: adcq %rbx, %rsi ; X64-NEXT: setb %cl -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r15 +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %r11 ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq %r10, %r9 -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload +; X64-NEXT: addq {{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload ; X64-NEXT: movq %r12, %r10 ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r9 ; X64-NEXT: adcq %rdx, %r10 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: mulq %rbp +; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: addq %r12, %rbx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r15 +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rcx, %r12 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rdi, %rbp +; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: adcq %rax, %r15 +; X64-NEXT: adcq %rax, %rdi ; X64-NEXT: addq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload ; X64-NEXT: addq %r14, %rbx -; X64-NEXT: adcq %r8, %r15 +; X64-NEXT: adcq %r8, %rdi ; X64-NEXT: adcq $0, %r9 ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %rbp, %rsi -; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %r12, %r11 +; X64-NEXT: movq %r11, %rax ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq %rbp, %r8 ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: addq %r14, %rcx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: movq 56(%rax), %rdi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %rdi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq 56(%rax), %rsi +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rsi, %r11 +; X64-NEXT: movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: addq %rcx, %r14 ; X64-NEXT: adcq %rbp, %rsi ; X64-NEXT: setb %cl ; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: mulq %r11 ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload -; X64-NEXT: addq %r11, %rcx +; X64-NEXT: movq (%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: addq %r8, %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload -; X64-NEXT: adcq %r13, %rsi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: adcq %r11, %rsi ; X64-NEXT: addq %rax, %rcx ; X64-NEXT: adcq %rdx, %rsi ; X64-NEXT: addq %rbx, %r12 -; X64-NEXT: adcq %r15, %r14 +; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rdi, %r14 ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: addq %r9, %rcx @@ -4900,97 +4896,92 @@ ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %r9, %rbx -; X64-NEXT: adcq $0, %r15 +; X64-NEXT: adcq $0, %r12 ; X64-NEXT: movq %rbp, %rax -; X64-NEXT: movq %r8, %rdi -; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rbx, %r8 -; X64-NEXT: adcq %r15, %r9 +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %rbx, %rbp +; X64-NEXT: adcq %r12, %r9 ; X64-NEXT: setb %bl ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: addq %r9, %rax ; X64-NEXT: movzbl %bl, %edi ; X64-NEXT: adcq %rdi, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload -; X64-NEXT: addq %r11, %r15 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: adcq %r13, %rbp -; X64-NEXT: addq %rax, %r15 -; X64-NEXT: adcq %rdx, %rbp +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload +; X64-NEXT: addq %r8, %r12 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: adcq %r11, %r10 +; X64-NEXT: addq %rax, %r12 +; X64-NEXT: adcq %rdx, %r10 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload ; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: adcq %rsi, %r8 +; X64-NEXT: adcq %rsi, %rbp ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # 1-byte Folded Reload -; X64-NEXT: adcq %rax, %r15 -; X64-NEXT: adcq $0, %rbp +; X64-NEXT: adcq %rax, %r12 +; X64-NEXT: adcq $0, %r10 ; X64-NEXT: addq {{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: addq %rax, {{[0-9]+}}(%rsp) # 8-byte Folded Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: adcq %rax, {{[0-9]+}}(%rsp) # 8-byte Folded Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload +; X64-NEXT: adcq %r15, {{[0-9]+}}(%rsp) # 8-byte Folded Spill +; X64-NEXT: adcq %r13, %r14 ; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rdx -; X64-NEXT: adcq $0, %r8 -; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %rbp +; X64-NEXT: adcq $0, %r12 +; X64-NEXT: adcq $0, %r10 ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload -; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload ; X64-NEXT: setb -{{[0-9]+}}(%rsp) # 1-byte Folded Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: mulq %rsi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %r10 +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %r11, %rbx ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r12 -; X64-NEXT: addq %rbx, %r12 +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: addq %rbx, %r9 ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %bl -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %r9 +; X64-NEXT: movq %rsi, %r13 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r8 -; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: movq %rcx, %r14 +; X64-NEXT: adcq %rdx, %r15 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r10, %rdi +; X64-NEXT: movq %r14, %rdi ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill @@ -5002,80 +4993,75 @@ ; X64-NEXT: addq %r11, %rbx ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r13 -; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rcx, %r14 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %bl ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %rcx, %rdi +; X64-NEXT: movq %rax, %r13 +; X64-NEXT: addq %rcx, %r13 ; X64-NEXT: movzbl %bl, %eax ; X64-NEXT: adcq %rax, %r11 -; X64-NEXT: addq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload +; X64-NEXT: addq {{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload -; X64-NEXT: addq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload -; X64-NEXT: adcq %r12, %r11 +; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload +; X64-NEXT: adcq %r9, %r11 ; X64-NEXT: adcq $0, %r8 -; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq $0, %r14 -; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r13, %rbx -; X64-NEXT: movq %rbx, %rax +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq $0, %r15 +; X64-NEXT: movq %r15, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r14, %rax ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq %rax, %r12 +; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: movq %rax, %r9 ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq %rsi, %r9 +; X64-NEXT: movq %rsi, %r15 ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %r10 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rcx -; X64-NEXT: addq %r8, %rcx +; X64-NEXT: addq %rbx, %rcx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload -; X64-NEXT: mulq %r13 +; X64-NEXT: movq %r14, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %rcx, %r8 ; X64-NEXT: adcq %rsi, %rbx ; X64-NEXT: setb %cl -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r13 -; X64-NEXT: movq %r13, %r9 +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %rdi ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r13 # 8-byte Reload -; X64-NEXT: addq %r13, %rsi -; X64-NEXT: movq (%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: adcq %r14, %rcx -; X64-NEXT: addq %rax, %rsi +; X64-NEXT: addq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: addq %rax, %r14 ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %rdi, %r12 +; X64-NEXT: addq %r13, %r9 +; X64-NEXT: movq %r9, %r13 ; X64-NEXT: adcq %r11, %r8 -; X64-NEXT: movq %r8, %r11 -; X64-NEXT: adcq $0, %rsi +; X64-NEXT: movq %r8, %r15 +; X64-NEXT: adcq $0, %r14 ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, (%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: setb -{{[0-9]+}}(%rsp) # 1-byte Folded Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq %r10, %rsi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, %r11 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload ; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %rsi @@ -5084,66 +5070,65 @@ ; X64-NEXT: addq %rcx, %rdi ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbx # 8-byte Reload +; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r10 -; X64-NEXT: addq %rdi, %r10 +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: addq %rdi, %r9 ; X64-NEXT: adcq %rsi, %rcx -; X64-NEXT: setb %bl +; X64-NEXT: setb %sil ; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %rbx ; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %bl, %ecx +; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: addq %r13, %rsi +; X64-NEXT: addq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: adcq %r14, %rcx +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload ; X64-NEXT: addq %rax, %rsi ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload -; X64-NEXT: adcq (%rsp), %r10 # 8-byte Folded Reload +; X64-NEXT: addq %r14, %r11 +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax # 1-byte Folded Reload ; X64-NEXT: adcq %rax, %rsi ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: addq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload +; X64-NEXT: addq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: addq %rax, {{[0-9]+}}(%rsp) # 8-byte Folded Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %rax, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill -; X64-NEXT: adcq %r15, %r12 -; X64-NEXT: movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rbp, %r11 -; X64-NEXT: movq %r11, (%rsp) # 8-byte Spill +; X64-NEXT: adcq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill +; X64-NEXT: adcq %r12, %r13 +; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %r10, %r15 +; X64-NEXT: movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax # 1-byte Folded Reload -; X64-NEXT: adcq %rax, %r14 -; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rax, %r11 +; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq $0, %r9 +; X64-NEXT: movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq 64(%r9), %r11 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: movq 64(%rcx), %r11 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rsi, %rbx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq 72(%rcx), %rsi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq 72(%r9), %rsi +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rsi, %rcx ; X64-NEXT: movq %rdx, %rsi @@ -5151,10 +5136,10 @@ ; X64-NEXT: addq %rbx, %r8 ; X64-NEXT: adcq %rbp, %rsi ; X64-NEXT: setb %bl -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %r10 -; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, %r13 +; X64-NEXT: movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: addq %rsi, %rdi @@ -5165,142 +5150,139 @@ ; X64-NEXT: mulq %rdx ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: movq %rdx, %r14 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r12 # 8-byte Reload -; X64-NEXT: addq %rbx, %r12 +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: addq %rbx, %r10 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload ; X64-NEXT: adcq %r14, %r15 -; X64-NEXT: addq %rdi, %r12 +; X64-NEXT: addq %rdi, %r10 ; X64-NEXT: adcq %rcx, %r15 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload +; X64-NEXT: movq %r12, %rax ; X64-NEXT: movq %r11, %rsi ; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: addq %r11, %rdi ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %r12, %rax +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq %rbp, %r11 +; X64-NEXT: mulq %r13 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: addq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r13 # 8-byte Reload +; X64-NEXT: adcq %r13, %r14 ; X64-NEXT: addq %rax, %rbx ; X64-NEXT: adcq %rdx, %r14 -; X64-NEXT: addq %r13, %rbx +; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload ; X64-NEXT: adcq %r8, %r14 -; X64-NEXT: adcq $0, %r12 +; X64-NEXT: adcq $0, %r10 ; X64-NEXT: adcq $0, %r15 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: movq 80(%rbp), %rdi -; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq 80(%r9), %rdi +; X64-NEXT: movq %r12, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: addq %r8, %rcx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq 88(%rbp), %r10 -; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq 88(%r9), %r9 +; X64-NEXT: movq %r12, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rcx, %r8 ; X64-NEXT: adcq %rsi, %rbp -; X64-NEXT: setb %r11b -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: setb %r12b +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %rsi ; X64-NEXT: addq %rbp, %rsi -; X64-NEXT: movzbl %r11b, %eax +; X64-NEXT: movzbl %r12b, %eax ; X64-NEXT: adcq %rax, %rcx ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: mulq %rdx -; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: addq %r9, %rbp +; X64-NEXT: movq %rdx, %r12 +; X64-NEXT: movq %rax, %r11 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %rdx, %rax -; X64-NEXT: addq %rsi, %rbp -; X64-NEXT: adcq %rcx, %rax -; X64-NEXT: addq %rbx, %r13 -; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %r11, %rax +; X64-NEXT: adcq %r12, %r13 +; X64-NEXT: addq %rsi, %rax +; X64-NEXT: adcq %rcx, %r13 +; X64-NEXT: addq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill ; X64-NEXT: adcq %r14, %r8 ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq $0, %rbp ; X64-NEXT: adcq $0, %rax -; X64-NEXT: addq %r12, %rbp -; X64-NEXT: movq %rbp, %r8 -; X64-NEXT: adcq %r15, %rax -; X64-NEXT: movq %rax, %r11 +; X64-NEXT: adcq $0, %r13 +; X64-NEXT: addq %r10, %rax +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: adcq %r15, %r13 ; X64-NEXT: setb %r14b -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %r15 -; X64-NEXT: movq %rax, %r12 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %r15 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r15, %rbx +; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, %rbx +; X64-NEXT: movq %rax, %rbp ; X64-NEXT: adcq %rsi, %rcx -; X64-NEXT: setb %sil -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: setb %bl +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %sil, %ecx +; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: addq %r9, %rsi +; X64-NEXT: addq %r11, %rsi ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: adcq %r12, %rcx ; X64-NEXT: addq %rax, %rsi ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %r8, %r12 -; X64-NEXT: movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %r11, %rbx -; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %r8, %r15 +; X64-NEXT: movq %r15, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %r13, %rbp +; X64-NEXT: movq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movzbl %r14b, %eax ; X64-NEXT: adcq %rax, %rsi ; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: imulq %rax, %r10 -; X64-NEXT: movq %rax, %r14 +; X64-NEXT: imulq %rax, %r9 +; X64-NEXT: movq %rax, %r10 ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %r10, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: addq %r9, %rdx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload ; X64-NEXT: imulq %rbp, %rdi ; X64-NEXT: addq %rdx, %rdi ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload @@ -5311,7 +5293,7 @@ ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rsi, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: imulq %rcx, %rax ; X64-NEXT: addq %rdx, %rax ; X64-NEXT: addq %r8, %r9 @@ -5319,11 +5301,11 @@ ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %rcx, %rdi -; X64-NEXT: mulq %r14 +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %r14 +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rcx, %rbx @@ -5344,12 +5326,11 @@ ; X64-NEXT: adcq %rax, %r12 ; X64-NEXT: addq %r9, %r13 ; X64-NEXT: adcq %r8, %r12 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload -; X64-NEXT: movq 120(%rdx), %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq 120(%rbp), %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload ; X64-NEXT: imulq %r10, %rcx -; X64-NEXT: movq 112(%rdx), %rsi -; X64-NEXT: movq %rdx, %rbp +; X64-NEXT: movq 112(%rbp), %rsi ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rax, %r11 @@ -5365,7 +5346,7 @@ ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: imulq %rdi, %rax ; X64-NEXT: addq %rdx, %rax ; X64-NEXT: addq %r11, %r9 @@ -5404,49 +5385,48 @@ ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload ; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq 80(%rsi), %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq 80(%r9), %rsi +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq 88(%rsi), %rax -; X64-NEXT: movq %rsi, %r9 -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq 88(%r9), %r8 +; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rcx, %r11 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r8, %rbx +; X64-NEXT: addq %rdi, %rbx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: addq %rbx, %r14 ; X64-NEXT: adcq %rbp, %rcx -; X64-NEXT: setb %r8b -; X64-NEXT: movq %rsi, %rax +; X64-NEXT: setb %r10b +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rcx, %rbx -; X64-NEXT: movzbl %r8b, %eax +; X64-NEXT: movzbl %r10b, %eax ; X64-NEXT: adcq %rax, %rbp -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload +; X64-NEXT: movq (%rsp), %r12 # 8-byte Reload ; X64-NEXT: addq %r12, %rsi ; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload @@ -5459,8 +5439,8 @@ ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq 72(%rdi), %r9 -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq 72(%rdi), %rdi +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx @@ -5470,11 +5450,10 @@ ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %rbp, %rcx ; X64-NEXT: setb %r11b -; X64-NEXT: movq %r9, %rax -; X64-NEXT: movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %rbp @@ -5492,20 +5471,20 @@ ; X64-NEXT: addq %rbp, %rcx ; X64-NEXT: adcq %rbx, %r8 ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, (%rsp) # 8-byte Spill ; X64-NEXT: adcq %r14, %r8 ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %r13, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp @@ -5518,74 +5497,72 @@ ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %dil -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %rbx ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: addq %r14, %r15 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload -; X64-NEXT: adcq %r13, %r11 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: addq %r9, %r15 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: adcq %r8, %r11 ; X64-NEXT: addq %rax, %r15 ; X64-NEXT: adcq %rdx, %r11 -; X64-NEXT: addq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq (%rsp), %r12 # 8-byte Folded Reload +; X64-NEXT: movq %r12, (%rsp) # 8-byte Spill ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload ; X64-NEXT: movq %rbp, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %r11 ; X64-NEXT: addq %rsi, %r15 ; X64-NEXT: adcq %r10, %r11 -; X64-NEXT: setb %r10b -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq %r8, %rdi -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r12 +; X64-NEXT: setb -{{[0-9]+}}(%rsp) # 1-byte Folded Spill +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %r14, %rsi +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r14 # 8-byte Reload +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: addq %r10, %rbx ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: adcq %rdi, %rcx -; X64-NEXT: setb %r8b -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %rdi -; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %r8b, %ecx +; X64-NEXT: movq %rax, %r12 +; X64-NEXT: adcq %rdi, %r10 +; X64-NEXT: setb %bl +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rcx, %rdi +; X64-NEXT: addq %r10, %rax +; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: addq %r14, %rsi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbx # 8-byte Reload +; X64-NEXT: addq %r9, %rbx ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: adcq %r13, %rcx -; X64-NEXT: addq %rax, %rsi +; X64-NEXT: adcq %r8, %rcx +; X64-NEXT: addq %rax, %rbx ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %r15, %r9 -; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %r11, %rbx -; X64-NEXT: movq %rbx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movzbl %r10b, %eax -; X64-NEXT: adcq %rax, %rsi -; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %r15, %rbp +; X64-NEXT: movq %rbp, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %r11, %r12 +; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax # 1-byte Folded Reload +; X64-NEXT: adcq %rax, %rbx +; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload ; X64-NEXT: movq 96(%rbp), %rcx ; X64-NEXT: imulq %rcx, %rdi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r12, %rsi ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rdi, %rdx @@ -5611,7 +5588,7 @@ ; X64-NEXT: movq %rbx, %rsi ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: movq %rbp, %r9 ; X64-NEXT: mulq %rcx @@ -5636,32 +5613,31 @@ ; X64-NEXT: addq %r10, %rbp ; X64-NEXT: adcq %rdi, %rbx ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: imulq %rax, %rsi -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rax, %r8 +; X64-NEXT: imulq %r13, %rsi +; X64-NEXT: movq %r13, %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rsi, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload -; X64-NEXT: imulq %r11, %rcx -; X64-NEXT: addq %rdx, %rcx -; X64-NEXT: movq %rcx, %r9 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: imulq %r11, %r8 +; X64-NEXT: addq %rdx, %r8 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload ; X64-NEXT: imulq %r15, %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: mulq %r14 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rax, %r10 ; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: imulq %r14, %rax +; X64-NEXT: movq %r14, %rax +; X64-NEXT: imulq %rdi, %rax ; X64-NEXT: addq %rdx, %rax -; X64-NEXT: addq %r8, %r10 -; X64-NEXT: adcq %r9, %rax +; X64-NEXT: addq %r9, %r10 +; X64-NEXT: adcq %r8, %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r14, %rax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %r14 ; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %r8 @@ -5685,7 +5661,7 @@ ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: addq %r10, %rax ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload -; X64-NEXT: addq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload +; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload ; X64-NEXT: adcq %r12, %rsi ; X64-NEXT: adcq %rbp, %rax ; X64-NEXT: adcq %rbx, %rdx @@ -5695,23 +5671,23 @@ ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload +; X64-NEXT: movq (%rsp), %rbp # 8-byte Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload ; X64-NEXT: movq %rcx, %r9 ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload ; X64-NEXT: movq %rdi, %r10 -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload -; X64-NEXT: adcq (%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload Index: test/CodeGen/X86/mul-i512.ll =================================================================== --- test/CodeGen/X86/mul-i512.ll +++ test/CodeGen/X86/mul-i512.ll @@ -625,20 +625,20 @@ ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload ; X32-NEXT: addl %eax, %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: adcl %eax, %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl %edi, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: adcl %ecx, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: addl %edx, %ebx -; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %eax, %esi +; X32-NEXT: adcl %esi, %edi +; X32-NEXT: movl %eax, %ecx ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload @@ -653,14 +653,13 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: adcl $0, %eax +; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %edx ; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload @@ -1046,14 +1045,14 @@ ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload -; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload +; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r15, %rbx +; X64-NEXT: addq %r10, %rbx ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rdi @@ -1062,7 +1061,7 @@ ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil -; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx @@ -1134,12 +1133,11 @@ ; X64-NEXT: adcq %rax, %r11 ; X64-NEXT: addq %r14, %r9 ; X64-NEXT: adcq %rbx, %r11 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload -; X64-NEXT: movq 56(%rdx), %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq 56(%rbp), %rcx ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload ; X64-NEXT: imulq %r10, %rcx -; X64-NEXT: movq 48(%rdx), %rbx -; X64-NEXT: movq %rdx, %rbp +; X64-NEXT: movq 48(%rbp), %rbx ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rax, %rsi Index: test/CodeGen/X86/mul128.ll =================================================================== --- test/CodeGen/X86/mul128.ll +++ test/CodeGen/X86/mul128.ll @@ -5,8 +5,8 @@ ; X64-LABEL: foo: ; X64: # BB#0: ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: imulq %rdi, %rcx ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: imulq %rax, %rcx ; X64-NEXT: mulq %r8 ; X64-NEXT: addq %rcx, %rdx ; X64-NEXT: imulq %r8, %rsi Index: test/CodeGen/X86/mul64.ll =================================================================== --- test/CodeGen/X86/mul64.ll +++ test/CodeGen/X86/mul64.ll @@ -19,8 +19,8 @@ ; ; X64-LABEL: foo: ; X64: # BB#0: -; X64-NEXT: imulq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: imulq %rsi, %rax ; X64-NEXT: retq %k = mul i64 %t, %u ret i64 %k Index: test/CodeGen/X86/mwaitx.ll =================================================================== --- test/CodeGen/X86/mwaitx.ll +++ test/CodeGen/X86/mwaitx.ll @@ -4,8 +4,8 @@ ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=bdver4 | FileCheck %s -check-prefix=WIN64 ; CHECK-LABEL: foo: -; CHECK: leaq (%rdi), %rax -; CHECK-NEXT: movl %esi, %ecx +; CHECK: movl %esi, %ecx +; CHECK-NEXT: leaq (%rdi), %rax ; CHECK-NEXT: monitorx ; WIN64-LABEL: foo: ; WIN64: leaq (%rcx), %rax @@ -21,13 +21,13 @@ declare void @llvm.x86.monitorx(i8*, i32, i32) nounwind ; CHECK-LABEL: bar: -; CHECK: movl %edi, %ecx +; CHECK: movl %edx, %ebx ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: movl %edx, %ebx +; CHECK-NEXT: movl %edi, %ecx ; CHECK-NEXT: mwaitx ; WIN64-LABEL: bar: -; WIN64: movl %edx, %eax ; WIN64: movl %r8d, %ebx +; WIN64: movl %edx, %eax ; WIN64-NEXT: mwaitx define void @bar(i32 %E, i32 %H, i32 %C) nounwind { entry: Index: test/CodeGen/X86/negate-i1.ll =================================================================== --- test/CodeGen/X86/negate-i1.ll +++ test/CodeGen/X86/negate-i1.ll @@ -5,9 +5,10 @@ define i8 @select_i8_neg1_or_0(i1 %a) { ; X64-LABEL: select_i8_neg1_or_0: ; X64: # BB#0: -; X64-NEXT: andb $1, %dil -; X64-NEXT: negb %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $1, %al +; X64-NEXT: negb %al +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq ; ; X32-LABEL: select_i8_neg1_or_0: @@ -23,8 +24,9 @@ define i8 @select_i8_neg1_or_0_zeroext(i1 zeroext %a) { ; X64-LABEL: select_i8_neg1_or_0_zeroext: ; X64: # BB#0: -; X64-NEXT: negb %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: negb %al +; X64-NEXT: # kill: %AL %AL %EAX ; X64-NEXT: retq ; ; X32-LABEL: select_i8_neg1_or_0_zeroext: @@ -39,9 +41,10 @@ define i16 @select_i16_neg1_or_0(i1 %a) { ; X64-LABEL: select_i16_neg1_or_0: ; X64: # BB#0: -; X64-NEXT: andl $1, %edi -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $1, %eax +; X64-NEXT: negl %eax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq ; ; X32-LABEL: select_i16_neg1_or_0: @@ -58,8 +61,9 @@ define i16 @select_i16_neg1_or_0_zeroext(i1 zeroext %a) { ; X64-LABEL: select_i16_neg1_or_0_zeroext: ; X64: # BB#0: -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq ; ; X32-LABEL: select_i16_neg1_or_0_zeroext: @@ -75,9 +79,9 @@ define i32 @select_i32_neg1_or_0(i1 %a) { ; X64-LABEL: select_i32_neg1_or_0: ; X64: # BB#0: -; X64-NEXT: andl $1, %edi -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $1, %eax +; X64-NEXT: negl %eax ; X64-NEXT: retq ; ; X32-LABEL: select_i32_neg1_or_0: @@ -93,8 +97,8 @@ define i32 @select_i32_neg1_or_0_zeroext(i1 zeroext %a) { ; X64-LABEL: select_i32_neg1_or_0_zeroext: ; X64: # BB#0: -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: negl %eax ; X64-NEXT: retq ; ; X32-LABEL: select_i32_neg1_or_0_zeroext: @@ -109,10 +113,9 @@ define i64 @select_i64_neg1_or_0(i1 %a) { ; X64-LABEL: select_i64_neg1_or_0: ; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: andl $1, %edi -; X64-NEXT: negq %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $1, %eax +; X64-NEXT: negq %rax ; X64-NEXT: retq ; ; X32-LABEL: select_i64_neg1_or_0: Index: test/CodeGen/X86/negate-shift.ll =================================================================== --- test/CodeGen/X86/negate-shift.ll +++ test/CodeGen/X86/negate-shift.ll @@ -4,8 +4,8 @@ define i32 @neg_lshr_signbit(i32 %x) { ; X64-LABEL: neg_lshr_signbit: ; X64: # BB#0: -; X64-NEXT: sarl $31, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarl $31, %eax ; X64-NEXT: retq %sh = lshr i32 %x, 31 %neg = sub i32 0, %sh @@ -15,8 +15,8 @@ define i64 @neg_ashr_signbit(i64 %x) { ; X64-LABEL: neg_ashr_signbit: ; X64: # BB#0: -; X64-NEXT: shrq $63, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $63, %rax ; X64-NEXT: retq %sh = ashr i64 %x, 63 %neg = sub i64 0, %sh Index: test/CodeGen/X86/negate.ll =================================================================== --- test/CodeGen/X86/negate.ll +++ test/CodeGen/X86/negate.ll @@ -42,8 +42,9 @@ define i8 @negate_zero_or_minsigned(i8 %x) { ; CHECK-LABEL: negate_zero_or_minsigned: ; CHECK: # BB#0: -; CHECK-NEXT: shlb $7, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlb $7, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %signbit = shl i8 %x, 7 %neg = sub i8 0, %signbit Index: test/CodeGen/X86/no-sse2-avg.ll =================================================================== --- test/CodeGen/X86/no-sse2-avg.ll +++ test/CodeGen/X86/no-sse2-avg.ll @@ -5,9 +5,9 @@ define <16 x i8> @PR27973() { ; CHECK-LABEL: PR27973: ; CHECK: # BB#0: -; CHECK-NEXT: movq $0, 8(%rdi) -; CHECK-NEXT: movq $0, (%rdi) ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq $0, 8(%rax) +; CHECK-NEXT: movq $0, (%rax) ; CHECK-NEXT: retq %t0 = zext <16 x i8> zeroinitializer to <16 x i32> %t1 = add nuw nsw <16 x i32> %t0, Index: test/CodeGen/X86/not-and-simplify.ll =================================================================== --- test/CodeGen/X86/not-and-simplify.ll +++ test/CodeGen/X86/not-and-simplify.ll @@ -7,9 +7,9 @@ define i32 @shrink_xor_constant1(i32 %x) { ; ALL-LABEL: shrink_xor_constant1: ; ALL: # BB#0: -; ALL-NEXT: shrl $31, %edi -; ALL-NEXT: xorl $1, %edi ; ALL-NEXT: movl %edi, %eax +; ALL-NEXT: shrl $31, %eax +; ALL-NEXT: xorl $1, %eax ; ALL-NEXT: retq %sh = lshr i32 %x, 31 %not = xor i32 %sh, -1 @@ -34,9 +34,10 @@ define i8 @shrink_xor_constant2(i8 %x) { ; ALL-LABEL: shrink_xor_constant2: ; ALL: # BB#0: -; ALL-NEXT: shlb $5, %dil -; ALL-NEXT: xorb $-32, %dil ; ALL-NEXT: movl %edi, %eax +; ALL-NEXT: shlb $5, %al +; ALL-NEXT: xorb $-32, %al +; ALL-NEXT: # kill: %AL %AL %EAX ; ALL-NEXT: retq %sh = shl i8 %x, 5 %not = xor i8 %sh, -1 Index: test/CodeGen/X86/palignr.ll =================================================================== --- test/CodeGen/X86/palignr.ll +++ test/CodeGen/X86/palignr.ll @@ -167,16 +167,15 @@ ; CHECK-SSE2-LABEL: test9: ; CHECK-SSE2: # BB#0: ; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 -; CHECK-SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero -; CHECK-SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; CHECK-SSE2-NEXT: por %xmm0, %xmm1 -; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 +; CHECK-SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero +; CHECK-SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1] +; CHECK-SSE2-NEXT: por %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retl ; ; CHECK-SSSE3-LABEL: test9: ; CHECK-SSSE3: # BB#0: -; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] ; CHECK-SSSE3-NEXT: movdqa %xmm1, %xmm0 +; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] ; CHECK-SSSE3-NEXT: retl ; ; CHECK-AVX-LABEL: test9: Index: test/CodeGen/X86/peep-setb.ll =================================================================== --- test/CodeGen/X86/peep-setb.ll +++ test/CodeGen/X86/peep-setb.ll @@ -7,9 +7,10 @@ define i8 @test1(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: test1: ; CHECK: # BB#0: -; CHECK-NEXT: cmpb %sil, %dil -; CHECK-NEXT: adcb $0, %sil ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpb %al, %dil +; CHECK-NEXT: adcb $0, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %cmp = icmp ult i8 %a, %b %cond = zext i1 %cmp to i8 @@ -20,9 +21,9 @@ define i32 @test2(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test2: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: adcl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: adcl $0, %eax ; CHECK-NEXT: retq %cmp = icmp ult i32 %a, %b %cond = zext i1 %cmp to i32 @@ -33,9 +34,9 @@ define i64 @test3(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test3: ; CHECK: # BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: adcq $0, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: adcq $0, %rax ; CHECK-NEXT: retq %cmp = icmp ult i64 %a, %b %conv = zext i1 %cmp to i64 @@ -46,9 +47,10 @@ define i8 @test4(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: test4: ; CHECK: # BB#0: -; CHECK-NEXT: cmpb %sil, %dil -; CHECK-NEXT: sbbb $0, %sil ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpb %al, %dil +; CHECK-NEXT: sbbb $0, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %cmp = icmp ult i8 %a, %b %cond = zext i1 %cmp to i8 @@ -59,9 +61,9 @@ define i32 @test5(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test5: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: sbbl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: sbbl $0, %eax ; CHECK-NEXT: retq %cmp = icmp ult i32 %a, %b %cond = zext i1 %cmp to i32 @@ -72,9 +74,9 @@ define i64 @test6(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test6: ; CHECK: # BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: sbbq $0, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: sbbq $0, %rax ; CHECK-NEXT: retq %cmp = icmp ult i64 %a, %b %conv = zext i1 %cmp to i64 @@ -85,9 +87,10 @@ define i8 @test7(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: test7: ; CHECK: # BB#0: -; CHECK-NEXT: cmpb %sil, %dil -; CHECK-NEXT: adcb $0, %sil ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpb %al, %dil +; CHECK-NEXT: adcb $0, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %cmp = icmp ult i8 %a, %b %cond = sext i1 %cmp to i8 @@ -98,9 +101,9 @@ define i32 @test8(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test8: ; CHECK: # BB#0: -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: adcl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: adcl $0, %eax ; CHECK-NEXT: retq %cmp = icmp ult i32 %a, %b %cond = sext i1 %cmp to i32 @@ -111,9 +114,9 @@ define i64 @test9(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test9: ; CHECK: # BB#0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: adcq $0, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: adcq $0, %rax ; CHECK-NEXT: retq %cmp = icmp ult i64 %a, %b %conv = sext i1 %cmp to i64 Index: test/CodeGen/X86/pku.ll =================================================================== --- test/CodeGen/X86/pku.ll +++ test/CodeGen/X86/pku.ll @@ -5,9 +5,9 @@ define void @test_x86_wrpkru(i32 %src) { ; CHECK-LABEL: test_x86_wrpkru: ; CHECK: ## BB#0: +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: wrpkru ; CHECK-NEXT: retq call void @llvm.x86.wrpkru(i32 %src) Index: test/CodeGen/X86/pr12312.ll =================================================================== --- test/CodeGen/X86/pr12312.ll +++ test/CodeGen/X86/pr12312.ll @@ -177,16 +177,16 @@ define i32 @vecsel128(<4 x i32> %input, i32 %a, i32 %b) { ; SSE41-LABEL: vecsel128: ; SSE41: # BB#0: -; SSE41-NEXT: ptest %xmm0, %xmm0 -; SSE41-NEXT: cmovel %esi, %edi ; SSE41-NEXT: movl %edi, %eax +; SSE41-NEXT: ptest %xmm0, %xmm0 +; SSE41-NEXT: cmovel %esi, %eax ; SSE41-NEXT: retq ; ; AVX-LABEL: vecsel128: ; AVX: # BB#0: -; AVX-NEXT: vptest %xmm0, %xmm0 -; AVX-NEXT: cmovel %esi, %edi ; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: vptest %xmm0, %xmm0 +; AVX-NEXT: cmovel %esi, %eax ; AVX-NEXT: retq %t0 = bitcast <4 x i32> %input to i128 %t1 = icmp ne i128 %t0, 0 @@ -197,17 +197,17 @@ define i32 @vecsel256(<8 x i32> %input, i32 %a, i32 %b) { ; SSE41-LABEL: vecsel256: ; SSE41: # BB#0: +; SSE41-NEXT: movl %edi, %eax ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: ptest %xmm0, %xmm0 -; SSE41-NEXT: cmovel %esi, %edi -; SSE41-NEXT: movl %edi, %eax +; SSE41-NEXT: cmovel %esi, %eax ; SSE41-NEXT: retq ; ; AVX-LABEL: vecsel256: ; AVX: # BB#0: -; AVX-NEXT: vptest %ymm0, %ymm0 -; AVX-NEXT: cmovel %esi, %edi ; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: vptest %ymm0, %ymm0 +; AVX-NEXT: cmovel %esi, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq %t0 = bitcast <8 x i32> %input to i256 @@ -219,20 +219,20 @@ define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) { ; SSE41-LABEL: vecsel512: ; SSE41: # BB#0: +; SSE41-NEXT: movl %edi, %eax ; SSE41-NEXT: por %xmm3, %xmm1 ; SSE41-NEXT: por %xmm2, %xmm1 ; SSE41-NEXT: por %xmm0, %xmm1 ; SSE41-NEXT: ptest %xmm1, %xmm1 -; SSE41-NEXT: cmovel %esi, %edi -; SSE41-NEXT: movl %edi, %eax +; SSE41-NEXT: cmovel %esi, %eax ; SSE41-NEXT: retq ; ; AVX-LABEL: vecsel512: ; AVX: # BB#0: +; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vptest %ymm0, %ymm0 -; AVX-NEXT: cmovel %esi, %edi -; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: cmovel %esi, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq %t0 = bitcast <16 x i32> %input to i512 Index: test/CodeGen/X86/pr15705.ll =================================================================== --- test/CodeGen/X86/pr15705.ll +++ test/CodeGen/X86/pr15705.ll @@ -22,14 +22,14 @@ ; ; X64-LABEL: PR15705: ; X64: # BB#0: # %entry +; X64-NEXT: movl %edx, %eax ; X64-NEXT: cmpl %esi, %edi ; X64-NEXT: je .LBB0_2 ; X64-NEXT: # BB#1: # %if.end -; X64-NEXT: cmpl %edx, %edi +; X64-NEXT: cmpl %eax, %edi ; X64-NEXT: cmovel %ecx, %esi -; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %esi, %eax ; X64-NEXT: .LBB0_2: # %return -; X64-NEXT: movl %edx, %eax ; X64-NEXT: retq entry: %cmp = icmp eq i32 %x, %a Index: test/CodeGen/X86/pr15981.ll =================================================================== --- test/CodeGen/X86/pr15981.ll +++ test/CodeGen/X86/pr15981.ll @@ -19,9 +19,9 @@ ; ; X64-LABEL: fn1: ; X64: # BB#0: -; X64-NEXT: testl %esi, %esi -; X64-NEXT: cmovel %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: testl %esi, %esi +; X64-NEXT: cmovel %esi, %eax ; X64-NEXT: retq %3 = icmp ne i32 %1, 0 %4 = select i1 %3, i32 %0, i32 0 Index: test/CodeGen/X86/pr23664.ll =================================================================== --- test/CodeGen/X86/pr23664.ll +++ test/CodeGen/X86/pr23664.ll @@ -7,8 +7,9 @@ ret i2 %or ; CHECK-LABEL: f: -; CHECK: addb %dil, %dil -; CHECK-NEXT: orb $1, %dil -; CHECK-NEXT: movl %edi, %eax +; CHECK: movl %edi, %eax +; CHECK-NEXT: addb %al, %al +; CHECK-NEXT: orb $1, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq } Index: test/CodeGen/X86/pr28173.ll =================================================================== --- test/CodeGen/X86/pr28173.ll +++ test/CodeGen/X86/pr28173.ll @@ -78,8 +78,9 @@ define i8 @foo8(i1 zeroext %i) #0 { ; CHECK-LABEL: foo8: ; CHECK: # BB#0: -; CHECK-NEXT: orb $-2, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orb $-2, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq br label %bb Index: test/CodeGen/X86/rot16.ll =================================================================== --- test/CodeGen/X86/rot16.ll +++ test/CodeGen/X86/rot16.ll @@ -13,8 +13,10 @@ ; X64-LABEL: foo: ; X64: # BB#0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldw %cl, %di, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shldw %cl, %ax, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %t0 = shl i16 %x, %z %t1 = sub i16 16, %z @@ -35,8 +37,10 @@ ; X64-LABEL: bar: ; X64: # BB#0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldw %cl, %di, %si ; X64-NEXT: movl %esi, %eax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shldw %cl, %di, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %t0 = shl i16 %y, %z %t1 = sub i16 16, %z @@ -56,8 +60,10 @@ ; X64-LABEL: un: ; X64: # BB#0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdw %cl, %di, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shrdw %cl, %ax, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %t0 = lshr i16 %x, %z %t1 = sub i16 16, %z @@ -78,8 +84,10 @@ ; X64-LABEL: bu: ; X64: # BB#0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdw %cl, %di, %si ; X64-NEXT: movl %esi, %eax +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shrdw %cl, %di, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %t0 = lshr i16 %y, %z %t1 = sub i16 16, %z @@ -97,8 +105,9 @@ ; ; X64-LABEL: xfoo: ; X64: # BB#0: -; X64-NEXT: rolw $5, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolw $5, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %t0 = lshr i16 %x, 11 %t1 = shl i16 %x, 5 @@ -116,8 +125,9 @@ ; ; X64-LABEL: xbar: ; X64: # BB#0: -; X64-NEXT: shldw $5, %di, %si ; X64-NEXT: movl %esi, %eax +; X64-NEXT: shldw $5, %di, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %t0 = shl i16 %y, 5 %t1 = lshr i16 %x, 11 @@ -134,8 +144,9 @@ ; ; X64-LABEL: xun: ; X64: # BB#0: -; X64-NEXT: rolw $11, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolw $11, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %t0 = lshr i16 %x, 5 %t1 = shl i16 %x, 11 @@ -153,8 +164,9 @@ ; ; X64-LABEL: xbu: ; X64: # BB#0: -; X64-NEXT: shldw $11, %si, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shldw $11, %si, %ax +; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %t0 = lshr i16 %y, 5 %t1 = shl i16 %x, 11 Index: test/CodeGen/X86/rot64.ll =================================================================== --- test/CodeGen/X86/rot64.ll +++ test/CodeGen/X86/rot64.ll @@ -6,9 +6,10 @@ define i64 @foo(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: foo: ; ALL: # BB#0: # %entry -; ALL-NEXT: movl %edx, %ecx -; ALL-NEXT: rolq %cl, %rdi +; ALL-NEXT: movq %rdx, %rcx ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: # kill: %CL %CL %RCX +; ALL-NEXT: rolq %cl, %rax ; ALL-NEXT: retq entry: %0 = shl i64 %x, %z @@ -21,9 +22,10 @@ define i64 @bar(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: bar: ; ALL: # BB#0: # %entry -; ALL-NEXT: movl %edx, %ecx -; ALL-NEXT: shldq %cl, %rdi, %rsi +; ALL-NEXT: movq %rdx, %rcx ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: # kill: %CL %CL %RCX +; ALL-NEXT: shldq %cl, %rdi, %rax ; ALL-NEXT: retq entry: %0 = shl i64 %y, %z @@ -36,9 +38,10 @@ define i64 @un(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: un: ; ALL: # BB#0: # %entry -; ALL-NEXT: movl %edx, %ecx -; ALL-NEXT: rorq %cl, %rdi +; ALL-NEXT: movq %rdx, %rcx ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: # kill: %CL %CL %RCX +; ALL-NEXT: rorq %cl, %rax ; ALL-NEXT: retq entry: %0 = lshr i64 %x, %z @@ -51,9 +54,10 @@ define i64 @bu(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: bu: ; ALL: # BB#0: # %entry -; ALL-NEXT: movl %edx, %ecx -; ALL-NEXT: shrdq %cl, %rdi, %rsi +; ALL-NEXT: movq %rdx, %rcx ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: # kill: %CL %CL %RCX +; ALL-NEXT: shrdq %cl, %rdi, %rax ; ALL-NEXT: retq entry: %0 = lshr i64 %y, %z @@ -66,14 +70,14 @@ define i64 @xfoo(i64 %x, i64 %y, i64 %z) nounwind readnone { ; X64-LABEL: xfoo: ; X64: # BB#0: # %entry -; X64-NEXT: rolq $7, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: rolq $7, %rax ; X64-NEXT: retq ; ; SHLD-LABEL: xfoo: ; SHLD: # BB#0: # %entry -; SHLD-NEXT: shldq $7, %rdi, %rdi ; SHLD-NEXT: movq %rdi, %rax +; SHLD-NEXT: shldq $7, %rax, %rax ; SHLD-NEXT: retq ; ; BMI2-LABEL: xfoo: @@ -115,8 +119,8 @@ define i64 @xbar(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: xbar: ; ALL: # BB#0: # %entry -; ALL-NEXT: shrdq $57, %rsi, %rdi ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: shrdq $57, %rsi, %rax ; ALL-NEXT: retq entry: %0 = shl i64 %y, 7 @@ -128,14 +132,14 @@ define i64 @xun(i64 %x, i64 %y, i64 %z) nounwind readnone { ; X64-LABEL: xun: ; X64: # BB#0: # %entry -; X64-NEXT: rolq $57, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: rolq $57, %rax ; X64-NEXT: retq ; ; SHLD-LABEL: xun: ; SHLD: # BB#0: # %entry -; SHLD-NEXT: shldq $57, %rdi, %rdi ; SHLD-NEXT: movq %rdi, %rax +; SHLD-NEXT: shldq $57, %rax, %rax ; SHLD-NEXT: retq ; ; BMI2-LABEL: xun: @@ -177,8 +181,8 @@ define i64 @xbu(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: xbu: ; ALL: # BB#0: # %entry -; ALL-NEXT: shldq $57, %rsi, %rdi ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: shldq $57, %rsi, %rax ; ALL-NEXT: retq entry: %0 = lshr i64 %y, 7 Index: test/CodeGen/X86/rotate.ll =================================================================== --- test/CodeGen/X86/rotate.ll +++ test/CodeGen/X86/rotate.ll @@ -43,8 +43,9 @@ ; 64-LABEL: rotl64: ; 64: # BB#0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rolq %cl, %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: # kill: %CL %CL %ECX +; 64-NEXT: rolq %cl, %rax ; 64-NEXT: retq %shift.upgrd.1 = zext i8 %Amt to i64 %B = shl i64 %A, %shift.upgrd.1 @@ -96,8 +97,9 @@ ; 64-LABEL: rotr64: ; 64: # BB#0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rorq %cl, %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: # kill: %CL %CL %ECX +; 64-NEXT: rorq %cl, %rax ; 64-NEXT: retq %shift.upgrd.3 = zext i8 %Amt to i64 %B = lshr i64 %A, %shift.upgrd.3 @@ -120,8 +122,8 @@ ; ; 64-LABEL: rotli64: ; 64: # BB#0: -; 64-NEXT: rolq $5, %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: rolq $5, %rax ; 64-NEXT: retq %B = shl i64 %A, 5 %C = lshr i64 %A, 59 @@ -141,8 +143,8 @@ ; ; 64-LABEL: rotri64: ; 64: # BB#0: -; 64-NEXT: rolq $59, %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: rolq $59, %rax ; 64-NEXT: retq %B = lshr i64 %A, 5 %C = shl i64 %A, 59 @@ -162,8 +164,8 @@ ; ; 64-LABEL: rotl1_64: ; 64: # BB#0: -; 64-NEXT: rolq %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: rolq %rax ; 64-NEXT: retq %B = shl i64 %A, 1 %C = lshr i64 %A, 63 @@ -183,8 +185,8 @@ ; ; 64-LABEL: rotr1_64: ; 64: # BB#0: -; 64-NEXT: rorq %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: rorq %rax ; 64-NEXT: retq %B = shl i64 %A, 63 %C = lshr i64 %A, 1 @@ -203,8 +205,9 @@ ; 64-LABEL: rotl32: ; 64: # BB#0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: roll %cl, %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: %CL %CL %ECX +; 64-NEXT: roll %cl, %eax ; 64-NEXT: retq %shift.upgrd.1 = zext i8 %Amt to i32 %B = shl i32 %A, %shift.upgrd.1 @@ -226,8 +229,9 @@ ; 64-LABEL: rotr32: ; 64: # BB#0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rorl %cl, %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: %CL %CL %ECX +; 64-NEXT: rorl %cl, %eax ; 64-NEXT: retq %shift.upgrd.3 = zext i8 %Amt to i32 %B = lshr i32 %A, %shift.upgrd.3 @@ -247,8 +251,8 @@ ; ; 64-LABEL: rotli32: ; 64: # BB#0: -; 64-NEXT: roll $5, %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: roll $5, %eax ; 64-NEXT: retq %B = shl i32 %A, 5 %C = lshr i32 %A, 27 @@ -265,8 +269,8 @@ ; ; 64-LABEL: rotri32: ; 64: # BB#0: -; 64-NEXT: roll $27, %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: roll $27, %eax ; 64-NEXT: retq %B = lshr i32 %A, 5 %C = shl i32 %A, 27 @@ -283,8 +287,8 @@ ; ; 64-LABEL: rotl1_32: ; 64: # BB#0: -; 64-NEXT: roll %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: roll %eax ; 64-NEXT: retq %B = shl i32 %A, 1 %C = lshr i32 %A, 31 @@ -301,8 +305,8 @@ ; ; 64-LABEL: rotr1_32: ; 64: # BB#0: -; 64-NEXT: rorl %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rorl %eax ; 64-NEXT: retq %B = shl i32 %A, 31 %C = lshr i32 %A, 1 @@ -321,8 +325,10 @@ ; 64-LABEL: rotl16: ; 64: # BB#0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rolw %cl, %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: %CL %CL %ECX +; 64-NEXT: rolw %cl, %ax +; 64-NEXT: # kill: %AX %AX %EAX ; 64-NEXT: retq %shift.upgrd.5 = zext i8 %Amt to i16 %B = shl i16 %A, %shift.upgrd.5 @@ -344,8 +350,10 @@ ; 64-LABEL: rotr16: ; 64: # BB#0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rorw %cl, %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: %CL %CL %ECX +; 64-NEXT: rorw %cl, %ax +; 64-NEXT: # kill: %AX %AX %EAX ; 64-NEXT: retq %shift.upgrd.7 = zext i8 %Amt to i16 %B = lshr i16 %A, %shift.upgrd.7 @@ -365,8 +373,9 @@ ; ; 64-LABEL: rotli16: ; 64: # BB#0: -; 64-NEXT: rolw $5, %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolw $5, %ax +; 64-NEXT: # kill: %AX %AX %EAX ; 64-NEXT: retq %B = shl i16 %A, 5 %C = lshr i16 %A, 11 @@ -383,8 +392,9 @@ ; ; 64-LABEL: rotri16: ; 64: # BB#0: -; 64-NEXT: rolw $11, %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolw $11, %ax +; 64-NEXT: # kill: %AX %AX %EAX ; 64-NEXT: retq %B = lshr i16 %A, 5 %C = shl i16 %A, 11 @@ -401,8 +411,9 @@ ; ; 64-LABEL: rotl1_16: ; 64: # BB#0: -; 64-NEXT: rolw %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolw %ax +; 64-NEXT: # kill: %AX %AX %EAX ; 64-NEXT: retq %B = shl i16 %A, 1 %C = lshr i16 %A, 15 @@ -419,8 +430,9 @@ ; ; 64-LABEL: rotr1_16: ; 64: # BB#0: -; 64-NEXT: rorw %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rorw %ax +; 64-NEXT: # kill: %AX %AX %EAX ; 64-NEXT: retq %B = lshr i16 %A, 1 %C = shl i16 %A, 15 @@ -439,8 +451,10 @@ ; 64-LABEL: rotl8: ; 64: # BB#0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rolb %cl, %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: %CL %CL %ECX +; 64-NEXT: rolb %cl, %al +; 64-NEXT: # kill: %AL %AL %EAX ; 64-NEXT: retq %B = shl i8 %A, %Amt %Amt2 = sub i8 8, %Amt @@ -460,8 +474,10 @@ ; 64-LABEL: rotr8: ; 64: # BB#0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rorb %cl, %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: %CL %CL %ECX +; 64-NEXT: rorb %cl, %al +; 64-NEXT: # kill: %AL %AL %EAX ; 64-NEXT: retq %B = lshr i8 %A, %Amt %Amt2 = sub i8 8, %Amt @@ -479,8 +495,9 @@ ; ; 64-LABEL: rotli8: ; 64: # BB#0: -; 64-NEXT: rolb $5, %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolb $5, %al +; 64-NEXT: # kill: %AL %AL %EAX ; 64-NEXT: retq %B = shl i8 %A, 5 %C = lshr i8 %A, 3 @@ -497,8 +514,9 @@ ; ; 64-LABEL: rotri8: ; 64: # BB#0: -; 64-NEXT: rolb $3, %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolb $3, %al +; 64-NEXT: # kill: %AL %AL %EAX ; 64-NEXT: retq %B = lshr i8 %A, 5 %C = shl i8 %A, 3 @@ -515,8 +533,9 @@ ; ; 64-LABEL: rotl1_8: ; 64: # BB#0: -; 64-NEXT: rolb %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolb %al +; 64-NEXT: # kill: %AL %AL %EAX ; 64-NEXT: retq %B = shl i8 %A, 1 %C = lshr i8 %A, 7 @@ -533,8 +552,9 @@ ; ; 64-LABEL: rotr1_8: ; 64: # BB#0: -; 64-NEXT: rorb %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rorb %al +; 64-NEXT: # kill: %AL %AL %EAX ; 64-NEXT: retq %B = lshr i8 %A, 1 %C = shl i8 %A, 7 Index: test/CodeGen/X86/rotate4.ll =================================================================== --- test/CodeGen/X86/rotate4.ll +++ test/CodeGen/X86/rotate4.ll @@ -8,8 +8,9 @@ ; CHECK-LABEL: rotate_left_32: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: roll %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %CL %CL %ECX +; CHECK-NEXT: roll %cl, %eax ; CHECK-NEXT: retq %and = and i32 %b, 31 %shl = shl i32 %a, %and @@ -24,8 +25,9 @@ ; CHECK-LABEL: rotate_right_32: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rorl %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %CL %CL %ECX +; CHECK-NEXT: rorl %cl, %eax ; CHECK-NEXT: retq %and = and i32 %b, 31 %shl = lshr i32 %a, %and @@ -39,9 +41,10 @@ define i64 @rotate_left_64(i64 %a, i64 %b) { ; CHECK-LABEL: rotate_left_64: ; CHECK: # BB#0: -; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rolq %cl, %rdi +; CHECK-NEXT: movq %rsi, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: rolq %cl, %rax ; CHECK-NEXT: retq %and = and i64 %b, 63 %shl = shl i64 %a, %and @@ -55,9 +58,10 @@ define i64 @rotate_right_64(i64 %a, i64 %b) { ; CHECK-LABEL: rotate_right_64: ; CHECK: # BB#0: -; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rorq %cl, %rdi +; CHECK-NEXT: movq %rsi, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: rorq %cl, %rax ; CHECK-NEXT: retq %and = and i64 %b, 63 %shl = lshr i64 %a, %and @@ -74,6 +78,7 @@ ; CHECK-LABEL: rotate_left_m32: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: %CL %CL %ECX ; CHECK-NEXT: roll %cl, (%rdi) ; CHECK-NEXT: retq %a = load i32, i32* %pa, align 16 @@ -91,6 +96,7 @@ ; CHECK-LABEL: rotate_right_m32: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: %CL %CL %ECX ; CHECK-NEXT: rorl %cl, (%rdi) ; CHECK-NEXT: retq %a = load i32, i32* %pa, align 16 @@ -107,7 +113,8 @@ define void @rotate_left_m64(i64 *%pa, i64 %b) { ; CHECK-LABEL: rotate_left_m64: ; CHECK: # BB#0: -; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: movq %rsi, %rcx +; CHECK-NEXT: # kill: %CL %CL %RCX ; CHECK-NEXT: rolq %cl, (%rdi) ; CHECK-NEXT: retq %a = load i64, i64* %pa, align 16 @@ -124,7 +131,8 @@ define void @rotate_right_m64(i64 *%pa, i64 %b) { ; CHECK-LABEL: rotate_right_m64: ; CHECK: # BB#0: -; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: movq %rsi, %rcx +; CHECK-NEXT: # kill: %CL %CL %RCX ; CHECK-NEXT: rorq %cl, (%rdi) ; CHECK-NEXT: retq %a = load i64, i64* %pa, align 16 @@ -145,8 +153,10 @@ ; CHECK-LABEL: rotate_left_8: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rolb %cl, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %CL %CL %ECX +; CHECK-NEXT: rolb %cl, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %amt = trunc i32 %amount to i8 %sub = sub i8 0, %amt @@ -162,8 +172,10 @@ ; CHECK-LABEL: rotate_right_8: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rorb %cl, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %CL %CL %ECX +; CHECK-NEXT: rorb %cl, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %amt = trunc i32 %amount to i8 %sub = sub i8 0, %amt @@ -179,8 +191,10 @@ ; CHECK-LABEL: rotate_left_16: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rolw %cl, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %CL %CL %ECX +; CHECK-NEXT: rolw %cl, %ax +; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: retq %amt = trunc i32 %amount to i16 %sub = sub i16 0, %amt @@ -196,8 +210,10 @@ ; CHECK-LABEL: rotate_right_16: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rorw %cl, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: %CL %CL %ECX +; CHECK-NEXT: rorw %cl, %ax +; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: retq %amt = trunc i32 %amount to i16 %sub = sub i16 0, %amt @@ -213,6 +229,7 @@ ; CHECK-LABEL: rotate_left_m8: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: %CL %CL %ECX ; CHECK-NEXT: rolb %cl, (%rdi) ; CHECK-NEXT: retq %x = load i8, i8* %p, align 1 @@ -231,6 +248,7 @@ ; CHECK-LABEL: rotate_right_m8: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: %CL %CL %ECX ; CHECK-NEXT: rorb %cl, (%rdi) ; CHECK-NEXT: retq %x = load i8, i8* %p, align 1 @@ -249,6 +267,7 @@ ; CHECK-LABEL: rotate_left_m16: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: %CL %CL %ECX ; CHECK-NEXT: rolw %cl, (%rdi) ; CHECK-NEXT: retq %x = load i16, i16* %p, align 1 @@ -267,6 +286,7 @@ ; CHECK-LABEL: rotate_right_m16: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: %CL %CL %ECX ; CHECK-NEXT: rorw %cl, (%rdi) ; CHECK-NEXT: retq %x = load i16, i16* %p, align 1 Index: test/CodeGen/X86/sad.ll =================================================================== --- test/CodeGen/X86/sad.ll +++ test/CodeGen/X86/sad.ll @@ -600,12 +600,12 @@ ; SSE2-NEXT: paddd %xmm1, %xmm15 ; SSE2-NEXT: pxor %xmm1, %xmm15 ; SSE2-NEXT: paddd %xmm15, %xmm2 -; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm4 # 16-byte Reload -; SSE2-NEXT: movdqa %xmm4, %xmm1 -; SSE2-NEXT: psrad $31, %xmm1 -; SSE2-NEXT: paddd %xmm1, %xmm4 -; SSE2-NEXT: pxor %xmm1, %xmm4 -; SSE2-NEXT: paddd %xmm4, %xmm6 +; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; SSE2-NEXT: movdqa %xmm1, %xmm4 +; SSE2-NEXT: psrad $31, %xmm4 +; SSE2-NEXT: paddd %xmm4, %xmm1 +; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: paddd %xmm1, %xmm6 ; SSE2-NEXT: movdqa %xmm6, %xmm15 ; SSE2-NEXT: movdqa %xmm10, %xmm1 ; SSE2-NEXT: psrad $31, %xmm1 @@ -614,12 +614,12 @@ ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload ; SSE2-NEXT: paddd %xmm10, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm10 -; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm6 # 16-byte Reload -; SSE2-NEXT: movdqa %xmm6, %xmm1 -; SSE2-NEXT: psrad $31, %xmm1 -; SSE2-NEXT: paddd %xmm1, %xmm6 -; SSE2-NEXT: pxor %xmm1, %xmm6 -; SSE2-NEXT: paddd %xmm6, %xmm3 +; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; SSE2-NEXT: movdqa %xmm1, %xmm6 +; SSE2-NEXT: psrad $31, %xmm6 +; SSE2-NEXT: paddd %xmm6, %xmm1 +; SSE2-NEXT: pxor %xmm6, %xmm1 +; SSE2-NEXT: paddd %xmm1, %xmm3 ; SSE2-NEXT: movdqa %xmm12, %xmm1 ; SSE2-NEXT: psrad $31, %xmm1 ; SSE2-NEXT: paddd %xmm1, %xmm12 Index: test/CodeGen/X86/sar_fold64.ll =================================================================== --- test/CodeGen/X86/sar_fold64.ll +++ test/CodeGen/X86/sar_fold64.ll @@ -56,9 +56,10 @@ define i8 @all_sign_bit_ashr(i8 %x) { ; CHECK-LABEL: all_sign_bit_ashr: ; CHECK: # BB#0: -; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: negb %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: negb %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %and = and i8 %x, 1 %neg = sub i8 0, %and Index: test/CodeGen/X86/select.ll =================================================================== --- test/CodeGen/X86/select.ll +++ test/CodeGen/X86/select.ll @@ -15,7 +15,6 @@ ; CHECK-NEXT: cmovneq %rdi, %rsi ; CHECK-NEXT: movl (%rsi), %eax ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test1: ; MCU: # BB#0: @@ -45,7 +44,7 @@ ; GENERIC-NEXT: callq _return_false ; GENERIC-NEXT: xorl %ecx, %ecx ; GENERIC-NEXT: testb $1, %al -; GENERIC-NEXT: movl $-480, %eax +; GENERIC-NEXT: movl $-480, %eax ## imm = 0xFE20 ; GENERIC-NEXT: cmovnel %ecx, %eax ; GENERIC-NEXT: shll $3, %eax ; GENERIC-NEXT: cmpl $32768, %eax ## imm = 0x8000 @@ -55,14 +54,13 @@ ; GENERIC-NEXT: popq %rcx ; GENERIC-NEXT: retq ; GENERIC-NEXT: LBB1_1: ## %bb90 -; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test2: ; ATOM: ## BB#0: ## %entry ; ATOM-NEXT: pushq %rax ; ATOM-NEXT: callq _return_false ; ATOM-NEXT: xorl %ecx, %ecx -; ATOM-NEXT: movl $-480, %edx +; ATOM-NEXT: movl $-480, %edx ## imm = 0xFE20 ; ATOM-NEXT: testb $1, %al ; ATOM-NEXT: cmovnel %ecx, %edx ; ATOM-NEXT: shll $3, %edx @@ -73,17 +71,16 @@ ; ATOM-NEXT: popq %rcx ; ATOM-NEXT: retq ; ATOM-NEXT: LBB1_1: ## %bb90 -; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test2: ; MCU: # BB#0: # %entry ; MCU-NEXT: calll return_false -; MCU-NEXT: xorl %ecx, %ecx +; MCU-NEXT: xorl %ecx, %ecx ; MCU-NEXT: testb $1, %al ; MCU-NEXT: jne .LBB1_2 ; MCU-NEXT: # BB#1: # %entry ; MCU-NEXT: movl $-480, %ecx # imm = 0xFE20 -; MCU-NEXT: .LBB1_2: +; MCU-NEXT: .LBB1_2: # %entry ; MCU-NEXT: shll $3, %ecx ; MCU-NEXT: cmpl $32768, %ecx # imm = 0x8000 ; MCU-NEXT: jge .LBB1_3 @@ -116,7 +113,6 @@ ; CHECK-NEXT: leaq {{.*}}(%rip), %rcx ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test3: ; MCU: # BB#0: # %entry @@ -140,7 +136,6 @@ ; CHECK-NEXT: seta %al ; CHECK-NEXT: movsbl (%rdi,%rax,4), %eax ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test4: ; MCU: # BB#0: # %entry @@ -175,7 +170,6 @@ ; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; CHECK-NEXT: movd %xmm0, (%rsi) ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test5: ; MCU: # BB#0: @@ -211,7 +205,6 @@ ; CHECK-NEXT: mulps %xmm0, %xmm0 ; CHECK-NEXT: movaps %xmm0, (%rsi) ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test6: ; MCU: # BB#0: @@ -283,7 +276,6 @@ ; CHECK-NEXT: leaq {{.*}}(%rip), %rcx ; CHECK-NEXT: fldt (%rax,%rcx) ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test7: ; MCU: # BB#0: @@ -333,7 +325,6 @@ ; GENERIC-NEXT: movq %xmm1, 16(%rsi) ; GENERIC-NEXT: movdqa %xmm0, (%rsi) ; GENERIC-NEXT: retq -; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test8: ; ATOM: ## BB#0: @@ -366,7 +357,6 @@ ; ATOM-NEXT: movdqa %xmm0, (%rsi) ; ATOM-NEXT: movq %xmm1, 16(%rsi) ; ATOM-NEXT: retq -; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test8: ; MCU: # BB#0: @@ -456,7 +446,6 @@ ; GENERIC-NEXT: sbbq %rax, %rax ; GENERIC-NEXT: orq %rsi, %rax ; GENERIC-NEXT: retq -; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test9: ; ATOM: ## BB#0: @@ -466,7 +455,6 @@ ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq -; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test9: ; MCU: # BB#0: @@ -493,7 +481,6 @@ ; GENERIC-NEXT: sbbq %rax, %rax ; GENERIC-NEXT: orq %rsi, %rax ; GENERIC-NEXT: retq -; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test9a: ; ATOM: ## BB#0: @@ -503,7 +490,6 @@ ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq -; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test9a: ; MCU: # BB#0: @@ -528,7 +514,6 @@ ; GENERIC-NEXT: sbbq %rax, %rax ; GENERIC-NEXT: orq %rsi, %rax ; GENERIC-NEXT: retq -; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test9b: ; ATOM: ## BB#0: @@ -538,7 +523,6 @@ ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq -; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test9b: ; MCU: # BB#0: @@ -566,7 +550,6 @@ ; CHECK-NEXT: setne %al ; CHECK-NEXT: leaq -1(%rax,%rax), %rax ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test10: ; MCU: # BB#0: @@ -592,7 +575,6 @@ ; CHECK-NEXT: notq %rax ; CHECK-NEXT: orq %rsi, %rax ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test11: ; MCU: # BB#0: @@ -619,7 +601,6 @@ ; CHECK-NEXT: notq %rax ; CHECK-NEXT: orq %rsi, %rax ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test11a: ; MCU: # BB#0: @@ -641,25 +622,14 @@ declare noalias i8* @_Znam(i64) noredzone define noalias i8* @test12(i64 %count) nounwind ssp noredzone { -; GENERIC-LABEL: test12: -; GENERIC: ## BB#0: ## %entry -; GENERIC-NEXT: movl $4, %ecx -; GENERIC-NEXT: movq %rdi, %rax -; GENERIC-NEXT: mulq %rcx -; GENERIC-NEXT: movq $-1, %rdi -; GENERIC-NEXT: cmovnoq %rax, %rdi -; GENERIC-NEXT: jmp __Znam ## TAILCALL -; GENERIC-NEXT: ## -- End function -; -; ATOM-LABEL: test12: -; ATOM: ## BB#0: ## %entry -; ATOM-NEXT: movq %rdi, %rax -; ATOM-NEXT: movl $4, %ecx -; ATOM-NEXT: mulq %rcx -; ATOM-NEXT: movq $-1, %rdi -; ATOM-NEXT: cmovnoq %rax, %rdi -; ATOM-NEXT: jmp __Znam ## TAILCALL -; ATOM-NEXT: ## -- End function +; CHECK-LABEL: test12: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movl $4, %ecx +; CHECK-NEXT: mulq %rcx +; CHECK-NEXT: movq $-1, %rdi +; CHECK-NEXT: cmovnoq %rax, %rdi +; CHECK-NEXT: jmp __Znam ## TAILCALL ; ; MCU-LABEL: test12: ; MCU: # BB#0: # %entry @@ -710,7 +680,6 @@ ; GENERIC-NEXT: cmpl %esi, %edi ; GENERIC-NEXT: sbbl %eax, %eax ; GENERIC-NEXT: retq -; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test13: ; ATOM: ## BB#0: @@ -721,7 +690,6 @@ ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq -; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test13: ; MCU: # BB#0: @@ -741,7 +709,6 @@ ; CHECK-NEXT: setae %al ; CHECK-NEXT: negl %eax ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test14: ; MCU: # BB#0: @@ -763,7 +730,6 @@ ; GENERIC-NEXT: negl %edi ; GENERIC-NEXT: sbbl %eax, %eax ; GENERIC-NEXT: retq -; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test15: ; ATOM: ## BB#0: ## %entry @@ -774,7 +740,6 @@ ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq -; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test15: ; MCU: # BB#0: # %entry @@ -826,7 +791,6 @@ ; GENERIC-NEXT: sbbl %eax, %eax ; GENERIC-NEXT: ## kill: %AX %AX %EAX ; GENERIC-NEXT: retq -; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test17: ; ATOM: ## BB#0: ## %entry @@ -838,7 +802,6 @@ ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq -; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test17: ; MCU: # BB#0: # %entry @@ -855,21 +818,21 @@ define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind { ; GENERIC-LABEL: test18: ; GENERIC: ## BB#0: -; GENERIC-NEXT: cmpl $15, %edi -; GENERIC-NEXT: cmovgel %edx, %esi ; GENERIC-NEXT: movl %esi, %eax +; GENERIC-NEXT: cmpl $15, %edi +; GENERIC-NEXT: cmovgel %edx, %eax +; GENERIC-NEXT: ## kill: %AL %AL %EAX ; GENERIC-NEXT: retq -; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test18: ; ATOM: ## BB#0: -; ATOM-NEXT: cmpl $15, %edi -; ATOM-NEXT: cmovgel %edx, %esi ; ATOM-NEXT: movl %esi, %eax +; ATOM-NEXT: cmpl $15, %edi +; ATOM-NEXT: cmovgel %edx, %eax +; ATOM-NEXT: ## kill: %AL %AL %EAX ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq -; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test18: ; MCU: # BB#0: @@ -888,16 +851,18 @@ define i32 @trunc_select_miscompile(i32 %a, i1 zeroext %cc) { ; CHECK-LABEL: trunc_select_miscompile: ; CHECK: ## BB#0: -; CHECK-NEXT: orb $2, %sil ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: shll %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orb $2, %cl +; CHECK-NEXT: ## kill: %CL %CL %ECX +; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: trunc_select_miscompile: ; MCU: # BB#0: -; MCU-NEXT: orb $2, %dl ; MCU-NEXT: movl %edx, %ecx +; MCU-NEXT: orb $2, %cl +; MCU-NEXT: # kill: %CL %CL %ECX ; MCU-NEXT: shll %cl, %eax ; MCU-NEXT: retl %tmp1 = select i1 %cc, i32 3, i32 2 @@ -1091,10 +1056,11 @@ ; CHECK-LABEL: select_xor_1: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: xorl $43, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: xorl $43, %ecx ; CHECK-NEXT: testb $1, %sil -; CHECK-NEXT: cmovnew %ax, %di -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: cmovnew %cx, %ax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq ; ; MCU-LABEL: select_xor_1: @@ -1116,10 +1082,10 @@ define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) { ; CHECK-LABEL: select_xor_2: ; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: xorl %edi, %esi -; CHECK-NEXT: testb $1, %dl -; CHECK-NEXT: cmovel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: xorl %edi, %eax +; CHECK-NEXT: testb $1, %dl +; CHECK-NEXT: cmovel %edi, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: select_xor_2: @@ -1140,10 +1106,10 @@ define i32 @select_or(i32 %A, i32 %B, i8 %cond) { ; CHECK-LABEL: select_or: ; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: testb $1, %dl -; CHECK-NEXT: cmovel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: testb $1, %dl +; CHECK-NEXT: cmovel %edi, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: select_or: @@ -1164,10 +1130,10 @@ define i32 @select_or_1(i32 %A, i32 %B, i32 %cond) { ; CHECK-LABEL: select_or_1: ; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: testb $1, %dl -; CHECK-NEXT: cmovel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: testb $1, %dl +; CHECK-NEXT: cmovel %edi, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: select_or_1: Index: test/CodeGen/X86/select_const.ll =================================================================== --- test/CodeGen/X86/select_const.ll +++ test/CodeGen/X86/select_const.ll @@ -43,8 +43,8 @@ define i32 @select_1_or_0(i1 %cond) { ; CHECK-LABEL: select_1_or_0: ; CHECK: # BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 1, i32 0 ret i32 %sel @@ -62,8 +62,8 @@ define i32 @select_1_or_0_signext(i1 signext %cond) { ; CHECK-LABEL: select_1_or_0_signext: ; CHECK: # BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 1, i32 0 ret i32 %sel @@ -95,8 +95,8 @@ define i32 @select_0_or_neg1_signext(i1 signext %cond) { ; CHECK-LABEL: select_0_or_neg1_signext: ; CHECK: # BB#0: -; CHECK-NEXT: notl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notl %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel @@ -107,9 +107,9 @@ define i32 @select_neg1_or_0(i1 %cond) { ; CHECK-LABEL: select_neg1_or_0: ; CHECK: # BB#0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: negl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: negl %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 -1, i32 0 ret i32 %sel @@ -118,8 +118,8 @@ define i32 @select_neg1_or_0_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_neg1_or_0_zeroext: ; CHECK: # BB#0: -; CHECK-NEXT: negl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: negl %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 -1, i32 0 ret i32 %sel @@ -329,9 +329,10 @@ define i8 @select_pow2_diff(i1 zeroext %cond) { ; CHECK-LABEL: select_pow2_diff: ; CHECK: # BB#0: -; CHECK-NEXT: shlb $4, %dil -; CHECK-NEXT: orb $3, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlb $4, %al +; CHECK-NEXT: orb $3, %al +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %sel = select i1 %cond, i8 19, i8 3 ret i8 %sel Index: test/CodeGen/X86/setcc-logic.ll =================================================================== --- test/CodeGen/X86/setcc-logic.ll +++ test/CodeGen/X86/setcc-logic.ll @@ -41,9 +41,10 @@ define zeroext i1 @all_sign_bits_set(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: all_sign_bits_set: ; CHECK: # BB#0: -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: shrl $31, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %a = icmp slt i32 %P, 0 %b = icmp slt i32 %Q, 0 @@ -66,9 +67,10 @@ define zeroext i1 @any_sign_bits_set(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: any_sign_bits_set: ; CHECK: # BB#0: -; CHECK-NEXT: orl %esi, %edi -; CHECK-NEXT: shrl $31, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq %a = icmp slt i32 %P, 0 %b = icmp slt i32 %Q, 0 Index: test/CodeGen/X86/sext-i1.ll =================================================================== --- test/CodeGen/X86/sext-i1.ll +++ test/CodeGen/X86/sext-i1.ll @@ -165,8 +165,8 @@ ; ; X64-LABEL: select_0_or_1s_signext: ; X64: # BB#0: -; X64-NEXT: notl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: notl %eax ; X64-NEXT: retq %not = xor i1 %cond, 1 %sext = sext i1 %not to i32 Index: test/CodeGen/X86/shift-and.ll =================================================================== --- test/CodeGen/X86/shift-and.ll +++ test/CodeGen/X86/shift-and.ll @@ -12,9 +12,10 @@ ; ; X64-LABEL: t1: ; X64: # BB#0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shll %cl, %esi ; X64-NEXT: movl %esi, %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shll %cl, %eax ; X64-NEXT: retq %shamt = and i32 %t, 31 %res = shl i32 %val, %shamt @@ -31,9 +32,10 @@ ; ; X64-LABEL: t2: ; X64: # BB#0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shll %cl, %esi ; X64-NEXT: movl %esi, %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: # kill: %CL %CL %ECX +; X64-NEXT: shll %cl, %eax ; X64-NEXT: retq %shamt = and i32 %t, 63 %res = shl i32 %val, %shamt @@ -52,6 +54,7 @@ ; X64-LABEL: t3: ; X64: # BB#0: ; X64-NEXT: movl %edi, %ecx +; X64-NEXT: # kill: %CL %CL %ECX ; X64-NEXT: sarw %cl, {{.*}}(%rip) ; X64-NEXT: retq %shamt = and i16 %t, 31 @@ -82,9 +85,10 @@ ; ; X64-LABEL: t4: ; X64: # BB#0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shrq %cl, %rsi ; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: # kill: %CL %CL %RCX +; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %shamt = and i64 %t, 63 %res = lshr i64 %val, %shamt @@ -112,9 +116,10 @@ ; ; X64-LABEL: t5: ; X64: # BB#0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shrq %cl, %rsi ; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: # kill: %CL %CL %RCX +; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %shamt = and i64 %t, 191 %res = lshr i64 %val, %shamt @@ -147,7 +152,8 @@ ; ; X64-LABEL: t5ptr: ; X64: # BB#0: -; X64-NEXT: movl %edi, %ecx +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: # kill: %CL %CL %RCX ; X64-NEXT: shrq %cl, (%rsi) ; X64-NEXT: retq %shamt = and i64 %t, 191 @@ -205,9 +211,9 @@ ; ; X64-LABEL: big_mask_constant: ; X64: # BB#0: -; X64-NEXT: shrq $7, %rdi -; X64-NEXT: andl $134217728, %edi # imm = 0x8000000 ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $7, %rax +; X64-NEXT: andl $134217728, %eax # imm = 0x8000000 ; X64-NEXT: retq %and = and i64 %x, 17179869184 ; 0x400000000 %sh = lshr i64 %and, 7 Index: test/CodeGen/X86/shift-bmi2.ll =================================================================== --- test/CodeGen/X86/shift-bmi2.ll +++ test/CodeGen/X86/shift-bmi2.ll @@ -26,8 +26,8 @@ ; ; BMI264-LABEL: shl32i: ; BMI264: # BB#0: -; BMI264-NEXT: shll $5, %edi ; BMI264-NEXT: movl %edi, %eax +; BMI264-NEXT: shll $5, %eax ; BMI264-NEXT: retq %shl = shl i32 %x, 5 ret i32 %shl @@ -69,6 +69,23 @@ } define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: shl64: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shldl %cl, %eax, %edx +; BMI2-NEXT: shlxl %ecx, %eax, %esi +; BMI2-NEXT: xorl %eax, %eax +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %edx +; BMI2-NEXT: cmovel %esi, %eax +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64: ; BMI264: # BB#0: ; BMI264-NEXT: shlxq %rsi, %rdi, %rax @@ -78,16 +95,42 @@ } define i64 @shl64i(i64 %x) nounwind uwtable readnone { +; BMI2-LABEL: shl64i: +; BMI2: # BB#0: +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shldl $7, %eax, %edx +; BMI2-NEXT: shll $7, %eax +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64i: ; BMI264: # BB#0: -; BMI264-NEXT: shlq $7, %rdi ; BMI264-NEXT: movq %rdi, %rax +; BMI264-NEXT: shlq $7, %rax ; BMI264-NEXT: retq %shl = shl i64 %x, 7 ret i64 %shl } define i64 @shl64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: shl64p: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl (%eax), %esi +; BMI2-NEXT: movl 4(%eax), %edx +; BMI2-NEXT: shldl %cl, %esi, %edx +; BMI2-NEXT: shlxl %ecx, %esi, %esi +; BMI2-NEXT: xorl %eax, %eax +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %edx +; BMI2-NEXT: cmovel %esi, %eax +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64p: ; BMI264: # BB#0: ; BMI264-NEXT: shlxq %rsi, (%rdi), %rax @@ -98,6 +141,15 @@ } define i64 @shl64pi(i64* %p) nounwind uwtable readnone { +; BMI2-LABEL: shl64pi: +; BMI2: # BB#0: +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; BMI2-NEXT: movl (%ecx), %eax +; BMI2-NEXT: movl 4(%ecx), %edx +; BMI2-NEXT: shldl $7, %eax, %edx +; BMI2-NEXT: shll $7, %eax +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64pi: ; BMI264: # BB#0: ; BMI264-NEXT: movq (%rdi), %rax @@ -141,6 +193,23 @@ } define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: lshr64: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: shrxl %ecx, %edx, %esi +; BMI2-NEXT: xorl %edx, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: lshr64: ; BMI264: # BB#0: ; BMI264-NEXT: shrxq %rsi, %rdi, %rax @@ -150,6 +219,24 @@ } define i64 @lshr64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: lshr64p: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: movl (%edx), %eax +; BMI2-NEXT: movl 4(%edx), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: shrxl %ecx, %edx, %esi +; BMI2-NEXT: xorl %edx, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: lshr64p: ; BMI264: # BB#0: ; BMI264-NEXT: shrxq %rsi, (%rdi), %rax @@ -192,6 +279,23 @@ } define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: ashr64: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: sarxl %ecx, %edx, %esi +; BMI2-NEXT: sarl $31, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: ashr64: ; BMI264: # BB#0: ; BMI264-NEXT: sarxq %rsi, %rdi, %rax @@ -201,6 +305,24 @@ } define i64 @ashr64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: ashr64p: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: movl (%edx), %eax +; BMI2-NEXT: movl 4(%edx), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: sarxl %ecx, %edx, %esi +; BMI2-NEXT: sarl $31, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: ashr64p: ; BMI264: # BB#0: ; BMI264-NEXT: sarxq %rsi, (%rdi), %rax @@ -227,6 +349,21 @@ } define i64 @shl64and(i64 %t, i64 %val) nounwind { +; BMI2-LABEL: shl64and: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shldl %cl, %eax, %edx +; BMI2-NEXT: shlxl %ecx, %eax, %esi +; BMI2-NEXT: xorl %eax, %eax +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %edx +; BMI2-NEXT: cmovel %esi, %eax +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64and: ; BMI264: # BB#0: ; BMI264-NEXT: shlxq %rdi, %rsi, %rax @@ -253,6 +390,21 @@ } define i64 @lshr64and(i64 %t, i64 %val) nounwind { +; BMI2-LABEL: lshr64and: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: shrxl %ecx, %edx, %esi +; BMI2-NEXT: xorl %edx, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: lshr64and: ; BMI264: # BB#0: ; BMI264-NEXT: shrxq %rdi, %rsi, %rax @@ -279,6 +431,21 @@ } define i64 @ashr64and(i64 %t, i64 %val) nounwind { +; BMI2-LABEL: ashr64and: +; BMI2: # BB#0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: sarxl %ecx, %edx, %esi +; BMI2-NEXT: sarl $31, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: ashr64and: ; BMI264: # BB#0: ; BMI264-NEXT: sarxq %rdi, %rsi, %rax Index: test/CodeGen/X86/shift-double-x86_64.ll =================================================================== --- test/CodeGen/X86/shift-double-x86_64.ll +++ test/CodeGen/X86/shift-double-x86_64.ll @@ -6,10 +6,11 @@ define i64 @test1(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test1: ; CHECK: # BB#0: -; CHECK-NEXT: andl $63, %edx -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: andl $63, %ecx +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: shldq %cl, %rsi, %rax ; CHECK-NEXT: retq %and = and i64 %bits, 63 %and64 = sub i64 64, %and @@ -22,10 +23,11 @@ define i64 @test2(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test2: ; CHECK: # BB#0: -; CHECK-NEXT: andl $63, %edx -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrdq %cl, %rdi, %rsi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: andl $63, %ecx +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: shrdq %cl, %rdi, %rax ; CHECK-NEXT: retq %and = and i64 %bits, 63 %and64 = sub i64 64, %and @@ -38,9 +40,10 @@ define i64 @test3(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test3: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: shldq %cl, %rsi, %rax ; CHECK-NEXT: retq %bits64 = sub i64 64, %bits %sh_lo = lshr i64 %lo, %bits64 @@ -52,9 +55,10 @@ define i64 @test4(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test4: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrdq %cl, %rdi, %rsi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: shrdq %cl, %rdi, %rax ; CHECK-NEXT: retq %bits64 = sub i64 64, %bits %sh_lo = shl i64 %hi, %bits64 @@ -66,9 +70,10 @@ define i64 @test5(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test5: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: shldq %cl, %rsi, %rax ; CHECK-NEXT: retq %bits64 = xor i64 %bits, 63 %lo2 = lshr i64 %lo, 1 @@ -81,9 +86,10 @@ define i64 @test6(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test6: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: shrdq %cl, %rsi, %rax ; CHECK-NEXT: retq %bits64 = xor i64 %bits, 63 %lo2 = shl i64 %lo, 1 @@ -96,9 +102,10 @@ define i64 @test7(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test7: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: %CL %CL %RCX +; CHECK-NEXT: shrdq %cl, %rsi, %rax ; CHECK-NEXT: retq %bits64 = xor i64 %bits, 63 %lo2 = add i64 %lo, %lo Index: test/CodeGen/X86/sret-implicit.ll =================================================================== --- test/CodeGen/X86/sret-implicit.ll +++ test/CodeGen/X86/sret-implicit.ll @@ -10,7 +10,7 @@ } ; X64-LABEL: sret_void -; X64-DAG: movl $0, (%rdi) +; X64-DAG: movl $0, (%rax) ; X64-DAG: movq %rdi, %rax ; X64: retq @@ -24,7 +24,7 @@ } ; X64-LABEL: sret_demoted -; X64-DAG: movq $0, (%rdi) +; X64-DAG: movq $0, (%rax) ; X64-DAG: movq %rdi, %rax ; X64: retq Index: test/CodeGen/X86/sse1.ll =================================================================== --- test/CodeGen/X86/sse1.ll +++ test/CodeGen/X86/sse1.ll @@ -192,9 +192,10 @@ ; ; X64-LABEL: PR30512: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorl %edi, %edi ; X64-NEXT: cmpl %r9d, %esi -; X64-NEXT: sete %al +; X64-NEXT: sete %dil ; X64-NEXT: xorl %esi, %esi ; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %edx ; X64-NEXT: sete %sil @@ -204,11 +205,10 @@ ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %r8d ; X64-NEXT: sete %cl -; X64-NEXT: movl %ecx, 12(%rdi) -; X64-NEXT: movl %edx, 8(%rdi) -; X64-NEXT: movl %esi, 4(%rdi) -; X64-NEXT: movl %eax, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movl %ecx, 12(%rax) +; X64-NEXT: movl %edx, 8(%rax) +; X64-NEXT: movl %esi, 4(%rax) +; X64-NEXT: movl %edi, (%rax) ; X64-NEXT: retq %cmp = icmp eq <4 x i32> %x, %y %zext = zext <4 x i1> %cmp to <4 x i32> Index: test/CodeGen/X86/sse3-schedule.ll =================================================================== --- test/CodeGen/X86/sse3-schedule.ll +++ test/CodeGen/X86/sse3-schedule.ll @@ -431,8 +431,8 @@ define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) { ; GENERIC-LABEL: test_monitor: ; GENERIC: # BB#0: -; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33] +; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; GENERIC-NEXT: monitor # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -445,50 +445,50 @@ ; ; SLM-LABEL: test_monitor: ; SLM: # BB#0: -; SLM-NEXT: leaq (%rdi), %rax # sched: [1:1.00] ; SLM-NEXT: movl %esi, %ecx # sched: [1:0.50] +; SLM-NEXT: leaq (%rdi), %rax # sched: [1:1.00] ; SLM-NEXT: monitor # sched: [100:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_monitor: ; SANDY: # BB#0: -; SANDY-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SANDY-NEXT: movl %esi, %ecx # sched: [1:0.33] +; SANDY-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SANDY-NEXT: monitor # sched: [100:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_monitor: ; HASWELL: # BB#0: -; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; HASWELL-NEXT: movl %esi, %ecx # sched: [1:0.25] +; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; HASWELL-NEXT: monitor # sched: [100:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; SKYLAKE-LABEL: test_monitor: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKYLAKE-NEXT: movl %esi, %ecx # sched: [1:0.25] +; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKYLAKE-NEXT: monitor # sched: [100:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_monitor: ; SKX: # BB#0: -; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKX-NEXT: movl %esi, %ecx # sched: [1:0.25] +; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKX-NEXT: monitor # sched: [100:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_monitor: ; BTVER2: # BB#0: -; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.17] +; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BTVER2-NEXT: monitor # sched: [100:0.17] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_monitor: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] ; ZNVER1-NEXT: movl %esi, %ecx # sched: [1:0.25] +; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] ; ZNVER1-NEXT: monitor # sched: [100:?] ; ZNVER1-NEXT: retq # sched: [1:0.50] tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2) @@ -712,64 +712,64 @@ define void @test_mwait(i32 %a0, i32 %a1) { ; GENERIC-LABEL: test_mwait: ; GENERIC: # BB#0: -; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] ; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33] +; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] ; GENERIC-NEXT: mwait # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_mwait: ; ATOM: # BB#0: -; ATOM-NEXT: movl %edi, %ecx # sched: [1:0.50] ; ATOM-NEXT: movl %esi, %eax # sched: [1:0.50] +; ATOM-NEXT: movl %edi, %ecx # sched: [1:0.50] ; ATOM-NEXT: mwait # sched: [46:23.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_mwait: ; SLM: # BB#0: -; SLM-NEXT: movl %edi, %ecx # sched: [1:0.50] ; SLM-NEXT: movl %esi, %eax # sched: [1:0.50] +; SLM-NEXT: movl %edi, %ecx # sched: [1:0.50] ; SLM-NEXT: mwait # sched: [100:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_mwait: ; SANDY: # BB#0: -; SANDY-NEXT: movl %edi, %ecx # sched: [1:0.33] ; SANDY-NEXT: movl %esi, %eax # sched: [1:0.33] +; SANDY-NEXT: movl %edi, %ecx # sched: [1:0.33] ; SANDY-NEXT: mwait # sched: [100:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mwait: ; HASWELL: # BB#0: -; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] ; HASWELL-NEXT: movl %esi, %eax # sched: [1:0.25] +; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] ; HASWELL-NEXT: mwait # sched: [20:2.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; SKYLAKE-LABEL: test_mwait: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKYLAKE-NEXT: movl %esi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKYLAKE-NEXT: mwait # sched: [20:2.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mwait: ; SKX: # BB#0: -; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKX-NEXT: movl %esi, %eax # sched: [1:0.25] +; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKX-NEXT: mwait # sched: [20:2.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_mwait: ; BTVER2: # BB#0: -; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.17] ; BTVER2-NEXT: movl %esi, %eax # sched: [1:0.17] +; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.17] ; BTVER2-NEXT: mwait # sched: [100:0.17] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_mwait: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25] ; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25] ; ZNVER1-NEXT: mwait # sched: [100:?] ; ZNVER1-NEXT: retq # sched: [1:0.50] tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1) Index: test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll =================================================================== --- test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll +++ test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll @@ -17,8 +17,8 @@ define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{ ; X64-LABEL: test_mm_crc64_u64: ; X64: # BB#0: -; X64-NEXT: crc32q %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: crc32q %rsi, %rax ; X64-NEXT: retq %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1) ret i64 %res Index: test/CodeGen/X86/sse42-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/sse42-intrinsics-fast-isel.ll +++ test/CodeGen/X86/sse42-intrinsics-fast-isel.ll @@ -19,12 +19,12 @@ ; ; X64-LABEL: test_mm_cmpestra: ; X64: # BB#0: -; X64-NEXT: xorl %r8d, %r8d -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: seta %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: seta %sil +; X64-NEXT: movl %esi, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -48,12 +48,12 @@ ; ; X64-LABEL: test_mm_cmpestrc: ; X64: # BB#0: -; X64-NEXT: xorl %r8d, %r8d -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: setb %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: setb %sil +; X64-NEXT: movl %esi, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -73,8 +73,8 @@ ; ; X64-LABEL: test_mm_cmpestri: ; X64: # BB#0: -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: retq @@ -95,8 +95,8 @@ ; ; X64-LABEL: test_mm_cmpestrm: ; X64: # BB#0: -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pcmpestrm $7, %xmm1, %xmm0 ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> @@ -122,12 +122,12 @@ ; ; X64-LABEL: test_mm_cmpestro: ; X64: # BB#0: -; X64-NEXT: xorl %r8d, %r8d -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: seto %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: seto %sil +; X64-NEXT: movl %esi, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -151,12 +151,12 @@ ; ; X64-LABEL: test_mm_cmpestrs: ; X64: # BB#0: -; X64-NEXT: xorl %r8d, %r8d -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: sets %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: sets %sil +; X64-NEXT: movl %esi, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -180,12 +180,12 @@ ; ; X64-LABEL: test_mm_cmpestrz: ; X64: # BB#0: -; X64-NEXT: xorl %r8d, %r8d -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: sete %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: sete %sil +; X64-NEXT: movl %esi, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -361,8 +361,8 @@ ; ; X64-LABEL: test_mm_crc32_u8: ; X64: # BB#0: -; X64-NEXT: crc32b %sil, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: crc32b %sil, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1) ret i32 %res @@ -379,8 +379,8 @@ ; ; X64-LABEL: test_mm_crc32_u16: ; X64: # BB#0: -; X64-NEXT: crc32w %si, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: crc32w %si, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1) ret i32 %res @@ -396,8 +396,8 @@ ; ; X64-LABEL: test_mm_crc32_u32: ; X64: # BB#0: -; X64-NEXT: crc32l %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: crc32l %esi, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) ret i32 %res Index: test/CodeGen/X86/sse42-intrinsics-x86_64.ll =================================================================== --- test/CodeGen/X86/sse42-intrinsics-x86_64.ll +++ test/CodeGen/X86/sse42-intrinsics-x86_64.ll @@ -9,8 +9,8 @@ define i64 @crc32_64_8(i64 %a, i8 %b) nounwind { ; CHECK-LABEL: crc32_64_8: ; CHECK: ## BB#0: -; CHECK-NEXT: crc32b %sil, %edi ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xfe] ; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] ; CHECK-NEXT: retq ## encoding: [0xc3] %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b) ret i64 %tmp @@ -19,8 +19,8 @@ define i64 @crc32_64_64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: crc32_64_64: ; CHECK: ## BB#0: -; CHECK-NEXT: crc32q %rsi, %rdi ## encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xfe] ; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: crc32q %rsi, %rax ## encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6] ; CHECK-NEXT: retq ## encoding: [0xc3] %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b) ret i64 %tmp Index: test/CodeGen/X86/sse42-schedule.ll =================================================================== --- test/CodeGen/X86/sse42-schedule.ll +++ test/CodeGen/X86/sse42-schedule.ll @@ -12,58 +12,58 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; GENERIC-LABEL: crc32_32_8: ; GENERIC: # BB#0: -; GENERIC-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; GENERIC-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_32_8: ; SLM: # BB#0: -; SLM-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SLM-NEXT: crc32b (%rdx), %edi # sched: [6:1.00] ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SLM-NEXT: crc32b (%rdx), %eax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: crc32_32_8: ; SANDY: # BB#0: -; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SANDY-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_32_8: ; HASWELL: # BB#0: -; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; HASWELL-NEXT: crc32b (%rdx), %eax # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; SKYLAKE-LABEL: crc32_32_8: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_32_8: ; SKX: # BB#0: -; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKX-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_32_8: ; BTVER2: # BB#0: -; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; BTVER2-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.17] +; BTVER2-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; BTVER2-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: crc32_32_8: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; ZNVER1-NEXT: crc32b (%rdx), %edi # sched: [10:1.00] ; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; ZNVER1-NEXT: crc32b (%rdx), %eax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1) %2 = load i8, i8 *%a2 @@ -75,58 +75,58 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { ; GENERIC-LABEL: crc32_32_16: ; GENERIC: # BB#0: -; GENERIC-NEXT: crc32w %si, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: crc32w %si, %eax # sched: [3:1.00] +; GENERIC-NEXT: crc32w (%rdx), %eax # sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_32_16: ; SLM: # BB#0: -; SLM-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SLM-NEXT: crc32w (%rdx), %edi # sched: [6:1.00] ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SLM-NEXT: crc32w (%rdx), %eax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: crc32_32_16: ; SANDY: # BB#0: -; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SANDY-NEXT: crc32w (%rdx), %eax # sched: [7:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_32_16: ; HASWELL: # BB#0: -; HASWELL-NEXT: crc32w %si, %edi # sched: [3:1.00] -; HASWELL-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-NEXT: crc32w %si, %eax # sched: [3:1.00] +; HASWELL-NEXT: crc32w (%rdx), %eax # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; SKYLAKE-LABEL: crc32_32_16: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_32_16: ; SKX: # BB#0: -; SKX-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SKX-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SKX-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_32_16: ; BTVER2: # BB#0: -; BTVER2-NEXT: crc32w %si, %edi # sched: [3:1.00] -; BTVER2-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.17] +; BTVER2-NEXT: crc32w %si, %eax # sched: [3:1.00] +; BTVER2-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: crc32_32_16: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: crc32w %si, %edi # sched: [3:1.00] -; ZNVER1-NEXT: crc32w (%rdx), %edi # sched: [10:1.00] ; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: crc32w %si, %eax # sched: [3:1.00] +; ZNVER1-NEXT: crc32w (%rdx), %eax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1) %2 = load i16, i16 *%a2 @@ -138,58 +138,58 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: crc32_32_32: ; GENERIC: # BB#0: -; GENERIC-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; GENERIC-NEXT: crc32l (%rdx), %eax # sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_32_32: ; SLM: # BB#0: -; SLM-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SLM-NEXT: crc32l (%rdx), %edi # sched: [6:1.00] ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SLM-NEXT: crc32l (%rdx), %eax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: crc32_32_32: ; SANDY: # BB#0: -; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SANDY-NEXT: crc32l (%rdx), %eax # sched: [7:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_32_32: ; HASWELL: # BB#0: -; HASWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; HASWELL-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; HASWELL-NEXT: crc32l (%rdx), %eax # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; SKYLAKE-LABEL: crc32_32_32: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_32_32: ; SKX: # BB#0: -; SKX-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SKX-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SKX-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_32_32: ; BTVER2: # BB#0: -; BTVER2-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; BTVER2-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.17] +; BTVER2-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; BTVER2-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: crc32_32_32: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; ZNVER1-NEXT: crc32l (%rdx), %edi # sched: [10:1.00] ; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; ZNVER1-NEXT: crc32l (%rdx), %eax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) %2 = load i32, i32 *%a2 @@ -201,58 +201,58 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { ; GENERIC-LABEL: crc32_64_8: ; GENERIC: # BB#0: -; GENERIC-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; GENERIC-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_64_8: ; SLM: # BB#0: -; SLM-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SLM-NEXT: crc32b (%rdx), %edi # sched: [6:1.00] ; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SLM-NEXT: crc32b (%rdx), %eax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: crc32_64_8: ; SANDY: # BB#0: -; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] +; SANDY-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SANDY-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_64_8: ; HASWELL: # BB#0: -; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HASWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; HASWELL-NEXT: crc32b (%rdx), %eax # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; SKYLAKE-LABEL: crc32_64_8: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_64_8: ; SKX: # BB#0: -; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKX-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKX-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_64_8: ; BTVER2: # BB#0: -; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; BTVER2-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.17] +; BTVER2-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; BTVER2-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: crc32_64_8: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; ZNVER1-NEXT: crc32b (%rdx), %edi # sched: [10:1.00] ; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25] +; ZNVER1-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; ZNVER1-NEXT: crc32b (%rdx), %eax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %a1) %2 = load i8, i8 *%a2 @@ -264,58 +264,58 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: crc32_64_64: ; GENERIC: # BB#0: -; GENERIC-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; GENERIC-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; GENERIC-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_64_64: ; SLM: # BB#0: -; SLM-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SLM-NEXT: crc32q (%rdx), %rdi # sched: [6:1.00] ; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SLM-NEXT: crc32q (%rdx), %rax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: crc32_64_64: ; SANDY: # BB#0: -; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SANDY-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] +; SANDY-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SANDY-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_64_64: ; HASWELL: # BB#0: -; HASWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; HASWELL-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00] ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HASWELL-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; HASWELL-NEXT: crc32q (%rdx), %rax # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; SKYLAKE-LABEL: crc32_64_64: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_64_64: ; SKX: # BB#0: -; SKX-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SKX-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKX-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SKX-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_64_64: ; BTVER2: # BB#0: -; BTVER2-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; BTVER2-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.17] +; BTVER2-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; BTVER2-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: crc32_64_64: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; ZNVER1-NEXT: crc32q (%rdx), %rdi # sched: [10:1.00] ; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25] +; ZNVER1-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; ZNVER1-NEXT: crc32q (%rdx), %rax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1) %2 = load i64, i64 *%a2 Index: test/CodeGen/X86/subcarry.ll =================================================================== --- test/CodeGen/X86/subcarry.ll +++ test/CodeGen/X86/subcarry.ll @@ -6,23 +6,23 @@ define %S @negate(%S* nocapture readonly %this) { ; CHECK-LABEL: negate: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movq (%rsi), %rax -; CHECK-NEXT: movq 8(%rsi), %rcx -; CHECK-NEXT: notq %rax -; CHECK-NEXT: addq $1, %rax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq (%rsi), %rcx +; CHECK-NEXT: movq 8(%rsi), %rdx ; CHECK-NEXT: notq %rcx -; CHECK-NEXT: adcq $0, %rcx -; CHECK-NEXT: movq 16(%rsi), %rdx +; CHECK-NEXT: addq $1, %rcx ; CHECK-NEXT: notq %rdx ; CHECK-NEXT: adcq $0, %rdx +; CHECK-NEXT: movq 16(%rsi), %rdi +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: adcq $0, %rdi ; CHECK-NEXT: movq 24(%rsi), %rsi ; CHECK-NEXT: notq %rsi ; CHECK-NEXT: adcq $0, %rsi -; CHECK-NEXT: movq %rax, (%rdi) -; CHECK-NEXT: movq %rcx, 8(%rdi) -; CHECK-NEXT: movq %rdx, 16(%rdi) -; CHECK-NEXT: movq %rsi, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rcx, (%rax) +; CHECK-NEXT: movq %rdx, 8(%rax) +; CHECK-NEXT: movq %rdi, 16(%rax) +; CHECK-NEXT: movq %rsi, 24(%rax) ; CHECK-NEXT: retq entry: %0 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 0 @@ -63,29 +63,29 @@ define %S @sub(%S* nocapture readonly %this, %S %arg.b) local_unnamed_addr { ; CHECK-LABEL: sub: ; CHECK: # BB#0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: notq %rdx -; CHECK-NEXT: xorl %r10d, %r10d +; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: addq (%rsi), %rdx -; CHECK-NEXT: setb %r10b +; CHECK-NEXT: setb %dil ; CHECK-NEXT: addq $1, %rdx -; CHECK-NEXT: adcq 8(%rsi), %r10 -; CHECK-NEXT: setb %al -; CHECK-NEXT: movzbl %al, %r11d +; CHECK-NEXT: adcq 8(%rsi), %rdi +; CHECK-NEXT: setb %r10b +; CHECK-NEXT: movzbl %r10b, %r10d ; CHECK-NEXT: notq %rcx -; CHECK-NEXT: addq %r10, %rcx -; CHECK-NEXT: adcq 16(%rsi), %r11 -; CHECK-NEXT: setb %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: addq %rdi, %rcx +; CHECK-NEXT: adcq 16(%rsi), %r10 +; CHECK-NEXT: setb %dil +; CHECK-NEXT: movzbl %dil, %edi ; CHECK-NEXT: notq %r8 -; CHECK-NEXT: addq %r11, %r8 -; CHECK-NEXT: adcq 24(%rsi), %rax +; CHECK-NEXT: addq %r10, %r8 +; CHECK-NEXT: adcq 24(%rsi), %rdi ; CHECK-NEXT: notq %r9 -; CHECK-NEXT: addq %rax, %r9 -; CHECK-NEXT: movq %rdx, (%rdi) -; CHECK-NEXT: movq %rcx, 8(%rdi) -; CHECK-NEXT: movq %r8, 16(%rdi) -; CHECK-NEXT: movq %r9, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: addq %rdi, %r9 +; CHECK-NEXT: movq %rdx, (%rax) +; CHECK-NEXT: movq %rcx, 8(%rax) +; CHECK-NEXT: movq %r8, 16(%rax) +; CHECK-NEXT: movq %r9, 24(%rax) ; CHECK-NEXT: retq entry: %0 = extractvalue %S %arg.b, 0 Index: test/CodeGen/X86/swift-return.ll =================================================================== --- test/CodeGen/X86/swift-return.ll +++ test/CodeGen/X86/swift-return.ll @@ -266,9 +266,9 @@ ; CHECK-LABEL: gen7 ; CHECK: movl %edi, %eax -; CHECK: movl %edi, %edx -; CHECK: movl %edi, %ecx -; CHECK: movl %edi, %r8d +; CHECK: movl %eax, %edx +; CHECK: movl %eax, %ecx +; CHECK: movl %eax, %r8d ; CHECK: retq define swiftcc { i32, i32, i32, i32 } @gen7(i32 %key) { %v0 = insertvalue { i32, i32, i32, i32 } undef, i32 %key, 0 @@ -280,9 +280,9 @@ ; CHECK-LABEL: gen8 ; CHECK: movq %rdi, %rax -; CHECK: movq %rdi, %rdx -; CHECK: movq %rdi, %rcx -; CHECK: movq %rdi, %r8 +; CHECK: movq %rax, %rdx +; CHECK: movq %rax, %rcx +; CHECK: movq %rax, %r8 ; CHECK: retq define swiftcc { i64, i64, i64, i64 } @gen8(i64 %key) { %v0 = insertvalue { i64, i64, i64, i64 } undef, i64 %key, 0 @@ -294,9 +294,9 @@ ; CHECK-LABEL: gen9 ; CHECK: movl %edi, %eax -; CHECK: movl %edi, %edx -; CHECK: movl %edi, %ecx -; CHECK: movl %edi, %r8d +; CHECK: movl %eax, %edx +; CHECK: movl %eax, %ecx +; CHECK: movl %eax, %r8d ; CHECK: retq define swiftcc { i8, i8, i8, i8 } @gen9(i8 %key) { %v0 = insertvalue { i8, i8, i8, i8 } undef, i8 %key, 0 @@ -306,13 +306,13 @@ ret { i8, i8, i8, i8 } %v3 } ; CHECK-LABEL: gen10 +; CHECK: movq %rdi, %rax ; CHECK: movaps %xmm0, %xmm1 ; CHECK: movaps %xmm0, %xmm2 ; CHECK: movaps %xmm0, %xmm3 -; CHECK: movq %rdi, %rax -; CHECK: movq %rdi, %rdx -; CHECK: movq %rdi, %rcx -; CHECK: movq %rdi, %r8 +; CHECK: movq %rax, %rdx +; CHECK: movq %rax, %rcx +; CHECK: movq %rax, %r8 ; CHECK: retq define swiftcc { double, double, double, double, i64, i64, i64, i64 } @gen10(double %keyd, i64 %keyi) { %v0 = insertvalue { double, double, double, double, i64, i64, i64, i64 } undef, double %keyd, 0 Index: test/CodeGen/X86/swifterror.ll =================================================================== --- test/CodeGen/X86/swifterror.ll +++ test/CodeGen/X86/swifterror.ll @@ -34,11 +34,11 @@ ; CHECK-APPLE-LABEL: caller: ; CHECK-APPLE: xorl %r12d, %r12d ; CHECK-APPLE: callq {{.*}}foo -; CHECK-APPLE: testq %r12, %r12 +; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: testq %rdi, %rdi ; CHECK-APPLE: jne ; Access part of the error object and save it to error_ref -; CHECK-APPLE: movb 8(%r12) -; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: movb 8(%rdi) ; CHECK-APPLE: callq {{.*}}free ; CHECK-O0-LABEL: caller: @@ -247,12 +247,12 @@ ; CHECK-APPLE: movl $1, %esi ; CHECK-APPLE: xorl %r12d, %r12d ; CHECK-APPLE: callq {{.*}}foo_sret -; CHECK-APPLE: testq %r12, %r12 +; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: testq %rdi, %rdi ; CHECK-APPLE: jne ; Access part of the error object and save it to error_ref -; CHECK-APPLE: movb 8(%r12), +; CHECK-APPLE: movb 8(%rdi), ; CHECK-APPLE: movb %{{.*}}, -; CHECK-APPLE: movq %r12, %rdi ; CHECK-APPLE: callq {{.*}}free ; CHECK-O0-LABEL: caller3: @@ -297,21 +297,21 @@ ; The first swifterror value: ; CHECK-APPLE: xorl %r12d, %r12d ; CHECK-APPLE: callq {{.*}}foo -; CHECK-APPLE: testq %r12, %r12 +; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: testq %rdi, %rdi ; CHECK-APPLE: jne ; Access part of the error object and save it to error_ref -; CHECK-APPLE: movb 8(%r12) -; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: movb 8(%rdi) ; CHECK-APPLE: callq {{.*}}free ; The second swifterror value: ; CHECK-APPLE: xorl %r12d, %r12d ; CHECK-APPLE: callq {{.*}}foo -; CHECK-APPLE: testq %r12, %r12 +; CHECK-APPLE: testq %rdi, %rdi ; CHECK-APPLE: jne ; Access part of the error object and save it to error_ref -; CHECK-APPLE: movb 8(%r12) -; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: movb 8(%rdi) +; CHECK-APPLE: movb %al, (%r14) ; CHECK-APPLE: callq {{.*}}free ; CHECK-O0-LABEL: caller_with_multiple_swifterror_values: @@ -488,8 +488,8 @@ ; CHECK-i386: retl ; CHECK-APPLE-LABEL: empty_swiftcc: ; CHECK-APPLE: movl %edx, %ecx -; CHECK-APPLE: movl %edi, %eax ; CHECK-APPLE: movl %esi, %edx +; CHECK-APPLE: movl %edi, %eax ; CHECK-APPLE: retq define swiftcc {i32, i32, i32} @empty_swiftcc({i32, i32, i32} , %swift_error** swifterror %error_ptr_ref) { entry: Index: test/CodeGen/X86/system-intrinsics-xsetbv.ll =================================================================== --- test/CodeGen/X86/system-intrinsics-xsetbv.ll +++ test/CodeGen/X86/system-intrinsics-xsetbv.ll @@ -11,8 +11,8 @@ ; CHECK64-LABEL: test_xsetbv ; CHECK64: movl %edx, %eax -; CHECK64: movl %edi, %ecx ; CHECK64: movl %esi, %edx +; CHECK64: movl %edi, %ecx ; CHECK64: xsetbv ; CHECK64: ret Index: test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll =================================================================== --- test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll +++ test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll @@ -40,10 +40,10 @@ ; X64-LABEL: test__blcic_u64: ; X64: # BB#0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorq $-1, %rax -; X64-NEXT: addq $1, %rdi -; X64-NEXT: andq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: xorq $-1, %rcx +; X64-NEXT: addq $1, %rax +; X64-NEXT: andq %rcx, %rax ; X64-NEXT: retq %1 = xor i64 %a0, -1 %2 = add i64 %a0, 1 @@ -89,10 +89,10 @@ ; X64-LABEL: test__blsic_u64: ; X64: # BB#0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorq $-1, %rax -; X64-NEXT: subq $1, %rdi -; X64-NEXT: orq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: xorq $-1, %rcx +; X64-NEXT: subq $1, %rax +; X64-NEXT: orq %rcx, %rax ; X64-NEXT: retq %1 = xor i64 %a0, -1 %2 = sub i64 %a0, 1 @@ -104,10 +104,10 @@ ; X64-LABEL: test__t1mskc_u64: ; X64: # BB#0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorq $-1, %rax -; X64-NEXT: addq $1, %rdi -; X64-NEXT: orq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: xorq $-1, %rcx +; X64-NEXT: addq $1, %rax +; X64-NEXT: orq %rcx, %rax ; X64-NEXT: retq %1 = xor i64 %a0, -1 %2 = add i64 %a0, 1 @@ -119,10 +119,10 @@ ; X64-LABEL: test__tzmsk_u64: ; X64: # BB#0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorq $-1, %rax -; X64-NEXT: subq $1, %rdi -; X64-NEXT: andq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: xorq $-1, %rcx +; X64-NEXT: subq $1, %rax +; X64-NEXT: andq %rcx, %rax ; X64-NEXT: retq %1 = xor i64 %a0, -1 %2 = sub i64 %a0, 1 Index: test/CodeGen/X86/tbm-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/tbm-intrinsics-fast-isel.ll +++ test/CodeGen/X86/tbm-intrinsics-fast-isel.ll @@ -72,10 +72,10 @@ ; X64-LABEL: test__blcic_u32: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl $-1, %eax -; X64-NEXT: addl $1, %edi -; X64-NEXT: andl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: xorl $-1, %ecx +; X64-NEXT: addl $1, %eax +; X64-NEXT: andl %ecx, %eax ; X64-NEXT: retq %1 = xor i32 %a0, -1 %2 = add i32 %a0, 1 @@ -154,10 +154,10 @@ ; X64-LABEL: test__blsic_u32: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl $-1, %eax -; X64-NEXT: subl $1, %edi -; X64-NEXT: orl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: xorl $-1, %ecx +; X64-NEXT: subl $1, %eax +; X64-NEXT: orl %ecx, %eax ; X64-NEXT: retq %1 = xor i32 %a0, -1 %2 = sub i32 %a0, 1 @@ -178,10 +178,10 @@ ; X64-LABEL: test__t1mskc_u32: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl $-1, %eax -; X64-NEXT: addl $1, %edi -; X64-NEXT: orl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: xorl $-1, %ecx +; X64-NEXT: addl $1, %eax +; X64-NEXT: orl %ecx, %eax ; X64-NEXT: retq %1 = xor i32 %a0, -1 %2 = add i32 %a0, 1 @@ -202,10 +202,10 @@ ; X64-LABEL: test__tzmsk_u32: ; X64: # BB#0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl $-1, %eax -; X64-NEXT: subl $1, %edi -; X64-NEXT: andl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: xorl $-1, %ecx +; X64-NEXT: subl $1, %eax +; X64-NEXT: andl %ecx, %eax ; X64-NEXT: retq %1 = xor i32 %a0, -1 %2 = sub i32 %a0, 1 Index: test/CodeGen/X86/tbm_patterns.ll =================================================================== --- test/CodeGen/X86/tbm_patterns.ll +++ test/CodeGen/X86/tbm_patterns.ll @@ -52,10 +52,10 @@ define i32 @test_x86_tbm_bextri_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u32_z2: ; CHECK: # BB#0: +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: shrl $4, %edi ; CHECK-NEXT: testl $4095, %edi # imm = 0xFFF -; CHECK-NEXT: cmovnel %edx, %esi -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = lshr i32 %a, 4 %t1 = and i32 %t0, 4095 @@ -113,10 +113,10 @@ define i64 @test_x86_tbm_bextri_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u64_z2: ; CHECK: # BB#0: +; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: shrl $4, %edi ; CHECK-NEXT: testl $4095, %edi # imm = 0xFFF -; CHECK-NEXT: cmovneq %rdx, %rsi -; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = lshr i64 %a, 4 %t1 = and i64 %t0, 4095 @@ -151,11 +151,11 @@ define i32 @test_x86_tbm_blcfill_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcfill_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %EDI %EDI %RDI -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: testl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: %EDI %EDI %RDI +; CHECK-NEXT: leal 1(%rdi), %ecx +; CHECK-NEXT: testl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 %t1 = and i32 %t0, %a @@ -190,10 +190,10 @@ define i64 @test_x86_tbm_blcfill_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcfill_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: testq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq 1(%rdi), %rcx +; CHECK-NEXT: testq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 %t1 = and i64 %t0, %a @@ -230,12 +230,12 @@ define i32 @test_x86_tbm_blci_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blci_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %EDI %EDI %RDI -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: orl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: %EDI %EDI %RDI +; CHECK-NEXT: leal 1(%rdi), %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 1, %a %t1 = xor i32 %t0, -1 @@ -273,11 +273,11 @@ define i64 @test_x86_tbm_blci_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blci_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: orq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq 1(%rdi), %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: orq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 1, %a %t1 = xor i64 %t0, -1 @@ -335,12 +335,12 @@ define i32 @test_x86_tbm_blcic_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcic_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: incl %edi -; CHECK-NEXT: testl %eax, %edi -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: incl %edi +; CHECK-NEXT: testl %ecx, %edi +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, 1 @@ -378,12 +378,12 @@ define i64 @test_x86_tbm_blcic_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcic_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: incq %rdi -; CHECK-NEXT: testq %rax, %rdi -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: testq %rcx, %rdi +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, 1 @@ -419,11 +419,11 @@ define i32 @test_x86_tbm_blcmsk_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcmsk_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %EDI %EDI %RDI -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: xorl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: %EDI %EDI %RDI +; CHECK-NEXT: leal 1(%rdi), %ecx +; CHECK-NEXT: xorl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 %t1 = xor i32 %t0, %a @@ -458,10 +458,10 @@ define i64 @test_x86_tbm_blcmsk_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcmsk_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: xorq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq 1(%rdi), %rcx +; CHECK-NEXT: xorq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 %t1 = xor i64 %t0, %a @@ -496,11 +496,11 @@ define i32 @test_x86_tbm_blcs_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcs_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %EDI %EDI %RDI -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: orl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: %EDI %EDI %RDI +; CHECK-NEXT: leal 1(%rdi), %ecx +; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 %t1 = or i32 %t0, %a @@ -535,10 +535,10 @@ define i64 @test_x86_tbm_blcs_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcs_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: orq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq 1(%rdi), %rcx +; CHECK-NEXT: orq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 %t1 = or i64 %t0, %a @@ -573,11 +573,11 @@ define i32 @test_x86_tbm_blsfill_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blsfill_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %EDI %EDI %RDI -; CHECK-NEXT: leal -1(%rdi), %eax -; CHECK-NEXT: orl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: %EDI %EDI %RDI +; CHECK-NEXT: leal -1(%rdi), %ecx +; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, -1 %t1 = or i32 %t0, %a @@ -612,10 +612,10 @@ define i64 @test_x86_tbm_blsfill_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blsfill_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: leaq -1(%rdi), %rax -; CHECK-NEXT: orq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq -1(%rdi), %rcx +; CHECK-NEXT: orq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, -1 %t1 = or i64 %t0, %a @@ -652,12 +652,12 @@ define i32 @test_x86_tbm_blsic_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blsic_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: decl %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: decl %edi +; CHECK-NEXT: orl %ecx, %edi +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, -1 @@ -695,12 +695,12 @@ define i64 @test_x86_tbm_blsic_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blsic_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: decq %rdi -; CHECK-NEXT: orq %rax, %rdi -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: decq %rdi +; CHECK-NEXT: orq %rcx, %rdi +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, -1 @@ -739,12 +739,12 @@ define i32 @test_x86_tbm_t1mskc_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_t1mskc_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: incl %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: incl %edi +; CHECK-NEXT: orl %ecx, %edi +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, 1 @@ -783,12 +783,12 @@ define i64 @test_x86_tbm_t1mskc_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_t1mskc_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: incq %rdi -; CHECK-NEXT: orq %rax, %rdi -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: orq %rcx, %rdi +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, 1 @@ -827,12 +827,12 @@ define i32 @test_x86_tbm_tzmsk_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_tzmsk_u32_z2: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: decl %edi -; CHECK-NEXT: testl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: decl %edi +; CHECK-NEXT: testl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, -1 @@ -871,12 +871,12 @@ define i64 @test_x86_tbm_tzmsk_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_tzmsk_u64_z2: ; CHECK: # BB#0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: decq %rdi -; CHECK-NEXT: testq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: decq %rdi +; CHECK-NEXT: testq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, -1 Index: test/CodeGen/X86/twoaddr-lea.ll =================================================================== --- test/CodeGen/X86/twoaddr-lea.ll +++ test/CodeGen/X86/twoaddr-lea.ll @@ -11,8 +11,8 @@ define i32 @test1(i32 %X) nounwind { ; CHECK-LABEL: test1: -; CHECK-NOT: mov -; CHECK: leal 1(%rdi) +; CHECK-NOT: mov ??FAILING?: movl %edi, %eax +; CHECK: leal 1(%rax) %Z = add i32 %X, 1 store volatile i32 %Z, i32* @G ret i32 %X Index: test/CodeGen/X86/umul-with-overflow.ll =================================================================== --- test/CodeGen/X86/umul-with-overflow.ll +++ test/CodeGen/X86/umul-with-overflow.ll @@ -15,8 +15,8 @@ ; ; X64-LABEL: a: ; X64: # BB#0: -; X64-NEXT: movl $3, %ecx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl $3, %ecx ; X64-NEXT: mull %ecx ; X64-NEXT: seto %al ; X64-NEXT: retq Index: test/CodeGen/X86/urem-power-of-two.ll =================================================================== --- test/CodeGen/X86/urem-power-of-two.ll +++ test/CodeGen/X86/urem-power-of-two.ll @@ -14,8 +14,8 @@ ; ; X64-LABEL: const_pow_2: ; X64: # BB#0: -; X64-NEXT: andl $31, %edi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andl $31, %eax ; X64-NEXT: retq %urem = urem i64 %x, 32 ret i64 %urem @@ -35,8 +35,9 @@ ; ; X64-LABEL: shift_left_pow_2: ; X64: # BB#0: -; X64-NEXT: movl $1, %eax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl $1, %eax +; X64-NEXT: # kill: %CL %CL %ECX ; X64-NEXT: shll %cl, %eax ; X64-NEXT: addl $33554431, %eax # imm = 0x1FFFFFF ; X64-NEXT: andl %edi, %eax @@ -61,8 +62,9 @@ ; ; X64-LABEL: shift_right_pow_2: ; X64: # BB#0: -; X64-NEXT: movl $32768, %eax # imm = 0x8000 ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl $32768, %eax # imm = 0x8000 +; X64-NEXT: # kill: %CL %CL %ECX ; X64-NEXT: shrl %cl, %eax ; X64-NEXT: decl %eax ; X64-NEXT: andl %edi, %eax Index: test/CodeGen/X86/use-add-flags.ll =================================================================== --- test/CodeGen/X86/use-add-flags.ll +++ test/CodeGen/X86/use-add-flags.ll @@ -10,16 +10,16 @@ define i32 @test1(i32* %x, i32 %y, i32 %a, i32 %b) nounwind { ; LNX-LABEL: test1: ; LNX: # BB#0: -; LNX-NEXT: addl (%rdi), %esi -; LNX-NEXT: cmovnsl %ecx, %edx ; LNX-NEXT: movl %edx, %eax +; LNX-NEXT: addl (%rdi), %esi +; LNX-NEXT: cmovnsl %ecx, %eax ; LNX-NEXT: retq ; ; WIN-LABEL: test1: ; WIN: # BB#0: -; WIN-NEXT: addl (%rcx), %edx -; WIN-NEXT: cmovnsl %r9d, %r8d ; WIN-NEXT: movl %r8d, %eax +; WIN-NEXT: addl (%rcx), %edx +; WIN-NEXT: cmovnsl %r9d, %eax ; WIN-NEXT: retq %tmp2 = load i32, i32* %x, align 4 ; [#uses=1] %tmp4 = add i32 %tmp2, %y ; [#uses=1] Index: test/CodeGen/X86/vector-bitreverse.ll =================================================================== --- test/CodeGen/X86/vector-bitreverse.ll +++ test/CodeGen/X86/vector-bitreverse.ll @@ -11,38 +11,40 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind { ; SSE-LABEL: test_bitreverse_i8: ; SSE: # BB#0: -; SSE-NEXT: rolb $4, %dil -; SSE-NEXT: movl %edi, %eax -; SSE-NEXT: andb $51, %al -; SSE-NEXT: shlb $2, %al -; SSE-NEXT: andb $-52, %dil -; SSE-NEXT: shrb $2, %dil -; SSE-NEXT: orb %al, %dil -; SSE-NEXT: movl %edi, %eax -; SSE-NEXT: andb $85, %al -; SSE-NEXT: addb %al, %al -; SSE-NEXT: andb $-86, %dil -; SSE-NEXT: shrb %dil -; SSE-NEXT: orb %al, %dil ; SSE-NEXT: movl %edi, %eax +; SSE-NEXT: rolb $4, %al +; SSE-NEXT: movl %eax, %ecx +; SSE-NEXT: andb $51, %cl +; SSE-NEXT: shlb $2, %cl +; SSE-NEXT: andb $-52, %al +; SSE-NEXT: shrb $2, %al +; SSE-NEXT: orb %cl, %al +; SSE-NEXT: movl %eax, %ecx +; SSE-NEXT: andb $85, %cl +; SSE-NEXT: addb %cl, %cl +; SSE-NEXT: andb $-86, %al +; SSE-NEXT: shrb %al +; SSE-NEXT: orb %cl, %al +; SSE-NEXT: # kill: %AL %AL %EAX ; SSE-NEXT: retq ; ; AVX-LABEL: test_bitreverse_i8: ; AVX: # BB#0: -; AVX-NEXT: rolb $4, %dil -; AVX-NEXT: movl %edi, %eax -; AVX-NEXT: andb $51, %al -; AVX-NEXT: shlb $2, %al -; AVX-NEXT: andb $-52, %dil -; AVX-NEXT: shrb $2, %dil -; AVX-NEXT: orb %al, %dil -; AVX-NEXT: movl %edi, %eax -; AVX-NEXT: andb $85, %al -; AVX-NEXT: addb %al, %al -; AVX-NEXT: andb $-86, %dil -; AVX-NEXT: shrb %dil -; AVX-NEXT: orb %al, %dil ; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: rolb $4, %al +; AVX-NEXT: movl %eax, %ecx +; AVX-NEXT: andb $51, %cl +; AVX-NEXT: shlb $2, %cl +; AVX-NEXT: andb $-52, %al +; AVX-NEXT: shrb $2, %al +; AVX-NEXT: orb %cl, %al +; AVX-NEXT: movl %eax, %ecx +; AVX-NEXT: andb $85, %cl +; AVX-NEXT: addb %cl, %cl +; AVX-NEXT: andb $-86, %al +; AVX-NEXT: shrb %al +; AVX-NEXT: orb %cl, %al +; AVX-NEXT: # kill: %AL %AL %EAX ; AVX-NEXT: retq ; ; XOP-LABEL: test_bitreverse_i8: Index: test/CodeGen/X86/vector-blend.ll =================================================================== --- test/CodeGen/X86/vector-blend.ll +++ test/CodeGen/X86/vector-blend.ll @@ -358,30 +358,30 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) { ; SSE2-LABEL: vsel_double8: ; SSE2: # BB#0: # %entry +; SSE2-NEXT: movaps %xmm7, %xmm3 +; SSE2-NEXT: movaps %xmm5, %xmm1 ; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] ; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] ; SSE2-NEXT: movapd %xmm4, %xmm0 -; SSE2-NEXT: movaps %xmm5, %xmm1 ; SSE2-NEXT: movapd %xmm6, %xmm2 -; SSE2-NEXT: movaps %xmm7, %xmm3 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: vsel_double8: ; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: movaps %xmm7, %xmm3 +; SSSE3-NEXT: movaps %xmm5, %xmm1 ; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] ; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] ; SSSE3-NEXT: movapd %xmm4, %xmm0 -; SSSE3-NEXT: movaps %xmm5, %xmm1 ; SSSE3-NEXT: movapd %xmm6, %xmm2 -; SSSE3-NEXT: movaps %xmm7, %xmm3 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: vsel_double8: ; SSE41: # BB#0: # %entry +; SSE41-NEXT: movaps %xmm7, %xmm3 +; SSE41-NEXT: movaps %xmm5, %xmm1 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm4[1] ; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm6[1] -; SSE41-NEXT: movaps %xmm5, %xmm1 -; SSE41-NEXT: movaps %xmm7, %xmm3 ; SSE41-NEXT: retq ; ; AVX-LABEL: vsel_double8: @@ -397,30 +397,30 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) { ; SSE2-LABEL: vsel_i648: ; SSE2: # BB#0: # %entry +; SSE2-NEXT: movaps %xmm7, %xmm3 +; SSE2-NEXT: movaps %xmm5, %xmm1 ; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] ; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] ; SSE2-NEXT: movapd %xmm4, %xmm0 -; SSE2-NEXT: movaps %xmm5, %xmm1 ; SSE2-NEXT: movapd %xmm6, %xmm2 -; SSE2-NEXT: movaps %xmm7, %xmm3 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: vsel_i648: ; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: movaps %xmm7, %xmm3 +; SSSE3-NEXT: movaps %xmm5, %xmm1 ; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] ; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] ; SSSE3-NEXT: movapd %xmm4, %xmm0 -; SSSE3-NEXT: movaps %xmm5, %xmm1 ; SSSE3-NEXT: movapd %xmm6, %xmm2 -; SSSE3-NEXT: movaps %xmm7, %xmm3 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: vsel_i648: ; SSE41: # BB#0: # %entry +; SSE41-NEXT: movaps %xmm7, %xmm3 +; SSE41-NEXT: movaps %xmm5, %xmm1 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7] ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7] -; SSE41-NEXT: movaps %xmm5, %xmm1 -; SSE41-NEXT: movaps %xmm7, %xmm3 ; SSE41-NEXT: retq ; ; AVX1-LABEL: vsel_i648: @@ -554,22 +554,22 @@ define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) { ; SSE2-LABEL: constant_blendvpd_avx: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] ; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] ; SSE2-NEXT: movapd %xmm3, %xmm1 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: constant_blendvpd_avx: ; SSSE3: # BB#0: # %entry -; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] ; SSSE3-NEXT: movaps %xmm2, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] ; SSSE3-NEXT: movapd %xmm3, %xmm1 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: constant_blendvpd_avx: ; SSE41: # BB#0: # %entry -; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm3[1] ; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm3[1] ; SSE41-NEXT: retq ; ; AVX-LABEL: constant_blendvpd_avx: @@ -767,20 +767,20 @@ define <4 x i64> @blend_shufflevector_4xi64(<4 x i64> %a, <4 x i64> %b) { ; SSE2-LABEL: blend_shufflevector_4xi64: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE2-NEXT: movaps %xmm3, %xmm1 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: blend_shufflevector_4xi64: ; SSSE3: # BB#0: # %entry -; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSSE3-NEXT: movaps %xmm3, %xmm1 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: blend_shufflevector_4xi64: ; SSE41: # BB#0: # %entry -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] ; SSE41-NEXT: movaps %xmm3, %xmm1 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: blend_shufflevector_4xi64: Index: test/CodeGen/X86/vector-compare-results.ll =================================================================== --- test/CodeGen/X86/vector-compare-results.ll +++ test/CodeGen/X86/vector-compare-results.ll @@ -344,210 +344,210 @@ define <32 x i1> @test_cmp_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32i8: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtb %xmm2, %xmm0 ; SSE2-NEXT: pcmpgtb %xmm3, %xmm1 ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32i8: ; SSE42: # BB#0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtb %xmm2, %xmm0 ; SSE42-NEXT: pcmpgtb %xmm3, %xmm1 -; SSE42-NEXT: pextrb $15, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $14, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $13, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $11, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $10, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $9, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $7, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $6, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $5, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $3, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $1, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $15, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $14, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $13, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $11, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $10, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $9, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $7, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $6, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $5, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $3, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $2, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $1, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $15, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $14, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $13, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $11, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $10, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $9, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $7, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $6, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $5, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $3, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $1, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $15, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $14, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $13, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $11, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $10, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $9, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $7, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $6, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $5, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $3, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $2, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $1, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32i8: @@ -885,6 +885,7 @@ define <32 x i1> @test_cmp_v32i16(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32i16: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtw %xmm5, %xmm1 ; SSE2-NEXT: pcmpgtw %xmm4, %xmm0 ; SSE2-NEXT: packsswb %xmm1, %xmm0 @@ -892,209 +893,208 @@ ; SSE2-NEXT: pcmpgtw %xmm6, %xmm2 ; SSE2-NEXT: packsswb %xmm3, %xmm2 ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32i16: ; SSE42: # BB#0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtw %xmm4, %xmm0 ; SSE42-NEXT: pcmpgtw %xmm5, %xmm1 ; SSE42-NEXT: pcmpgtw %xmm6, %xmm2 ; SSE42-NEXT: pcmpgtw %xmm7, %xmm3 -; SSE42-NEXT: pextrb $14, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $10, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $6, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $14, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $10, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $6, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $14, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $10, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $6, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $2, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $14, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $10, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $6, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $2, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $14, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $10, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $6, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $14, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $10, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $6, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $14, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $10, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $6, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $2, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $14, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $10, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $6, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $2, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32i16: @@ -1155,412 +1155,413 @@ define <64 x i1> @test_cmp_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind { ; SSE2-LABEL: test_cmp_v64i8: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtb %xmm4, %xmm0 ; SSE2-NEXT: pcmpgtb %xmm5, %xmm1 ; SSE2-NEXT: pcmpgtb %xmm6, %xmm2 ; SSE2-NEXT: pcmpgtb %xmm7, %xmm3 ; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 6(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 6(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 4(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 4(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v64i8: ; SSE42: # BB#0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtb %xmm4, %xmm0 ; SSE42-NEXT: pcmpgtb %xmm5, %xmm1 ; SSE42-NEXT: pcmpgtb %xmm6, %xmm2 ; SSE42-NEXT: pcmpgtb %xmm7, %xmm3 -; SSE42-NEXT: pextrb $15, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $14, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $13, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $12, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $11, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $10, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $9, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $8, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $7, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $6, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $5, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $4, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $3, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $2, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $1, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $0, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $15, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $14, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $13, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $12, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $11, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $10, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $9, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $8, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $7, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $6, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $5, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $4, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $3, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $2, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $1, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $0, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $15, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $14, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $13, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $11, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $10, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $9, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $7, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $6, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $5, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $3, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $1, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $15, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $14, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $13, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $11, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $10, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $9, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $7, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $6, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $5, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $3, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $2, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $1, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $15, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $14, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $13, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $12, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $11, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $10, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $9, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $8, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $7, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $6, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $5, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $4, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $3, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $2, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $1, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $0, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $15, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $14, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $13, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $12, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $11, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $10, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $9, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $8, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $7, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $6, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $5, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $4, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $3, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $2, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $1, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $0, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $15, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $14, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $13, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $11, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $10, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $9, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $7, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $6, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $5, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $3, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $1, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $15, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $14, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $13, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $11, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $10, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $9, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $7, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $6, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $5, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $3, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $2, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $1, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v64i8: ; AVX1: # BB#0: +; AVX1-NEXT: movq %rdi, %rax ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 @@ -1569,401 +1570,400 @@ ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpextrb $15, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $14, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $13, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $12, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $11, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $10, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $9, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $8, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $7, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $6, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $5, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $4, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $3, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $2, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $1, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $0, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $15, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $14, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $13, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $12, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $11, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $10, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $9, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $8, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $7, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $6, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $5, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $4, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $3, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $2, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $1, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $0, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: movq %rdi, %rax +; AVX1-NEXT: vpextrb $15, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $14, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $13, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $12, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $11, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $10, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $9, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $8, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $7, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $6, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $5, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $4, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $3, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $2, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $1, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $0, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $15, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $14, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $13, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $12, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $11, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $10, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $9, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $8, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $7, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $6, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $5, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $4, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $3, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $2, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $1, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $0, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $15, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $14, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $13, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $12, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $11, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $10, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $9, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $8, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $7, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $6, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $5, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $4, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $3, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $2, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $1, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $0, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $15, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $14, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $13, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $12, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $11, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $10, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $9, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $8, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $7, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $6, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $5, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $4, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $3, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $2, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $1, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $0, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_cmp_v64i8: ; AVX2: # BB#0: +; AVX2-NEXT: movq %rdi, %rax ; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrb $15, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $15, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) +; AVX2-NEXT: vpextrb $15, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $14, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $13, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $12, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $11, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $10, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $9, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $8, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $7, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $6, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $5, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $4, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $3, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $2, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $1, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $0, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $15, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $14, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $13, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $12, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $11, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $10, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $9, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $8, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $7, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $6, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $5, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $4, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $3, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $2, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $1, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $0, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpextrb $15, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $14, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $13, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $11, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $9, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $7, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $5, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $3, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $1, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $0, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $15, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $14, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $13, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $11, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $9, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $7, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $5, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $3, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $1, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $0, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: movq %rdi, %rax +; AVX2-NEXT: vpextrb $15, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $14, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $13, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $12, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $11, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $10, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $9, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $8, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $7, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $6, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $5, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $4, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $3, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $2, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $1, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $0, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $15, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $14, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $13, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $12, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $11, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $10, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $9, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $8, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $7, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $6, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $5, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $4, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $3, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $2, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $1, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $0, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -2190,6 +2190,7 @@ define <32 x i1> @test_cmp_v32f32(<32 x float> %a0, <32 x float> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32f32: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9 ; SSE2-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10 @@ -2242,108 +2243,108 @@ ; SSE2-NEXT: pand %xmm3, %xmm4 ; SSE2-NEXT: packuswb %xmm2, %xmm4 ; SSE2-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32f32: ; SSE42: # BB#0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: movaps {{[0-9]+}}(%rsp), %xmm15 ; SSE42-NEXT: movaps {{[0-9]+}}(%rsp), %xmm14 ; SSE42-NEXT: movaps {{[0-9]+}}(%rsp), %xmm13 @@ -2360,103 +2361,102 @@ ; SSE42-NEXT: cmpltps %xmm5, %xmm13 ; SSE42-NEXT: cmpltps %xmm6, %xmm14 ; SSE42-NEXT: cmpltps %xmm7, %xmm15 -; SSE42-NEXT: pextrb $12, %xmm15, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm15, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm15, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm15, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm14, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm14, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm14, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm14, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm13, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm13, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm13, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm13, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm12, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm12, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm12, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm12, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm11, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm11, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm11, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm11, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm10, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm10, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm10, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm10, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm9, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm9, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm9, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm9, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm8, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm8, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm8, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm8, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $12, %xmm15, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm15, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm15, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm15, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm14, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm14, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm14, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm14, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm13, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm13, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm13, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm13, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm12, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm12, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm12, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm12, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm11, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm11, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm11, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm11, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm10, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm10, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm10, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm10, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm9, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm9, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm9, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm9, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm8, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm8, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm8, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm8, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32f32: @@ -3404,6 +3404,7 @@ define <32 x i1> @test_cmp_v32i32(<32 x i32> %a0, <32 x i32> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32i32: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm3 ; SSE2-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm2 ; SSE2-NEXT: packssdw %xmm3, %xmm2 @@ -3419,225 +3420,224 @@ ; SSE2-NEXT: packssdw %xmm5, %xmm4 ; SSE2-NEXT: packsswb %xmm6, %xmm4 ; SSE2-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movq %rdi, %rax -; SSE2-NEXT: retq -; -; SSE42-LABEL: test_cmp_v32i32: -; SSE42: # BB#0: -; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm0 -; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm1 -; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm2 -; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm3 -; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm4 -; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm5 -; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm6 -; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm7 -; SSE42-NEXT: pextrb $12, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: movq %rdi, %rax -; SSE42-NEXT: retq -; -; AVX1-LABEL: test_cmp_v32i32: -; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm8 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm9 -; AVX1-NEXT: vpcmpgtd %xmm8, %xmm9, %xmm8 -; AVX1-NEXT: vpcmpgtd %xmm7, %xmm3, %xmm3 -; AVX1-NEXT: vpacksswb %xmm8, %xmm3, %xmm8 -; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm7 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 -; AVX1-NEXT: vpcmpgtd %xmm7, %xmm3, %xmm3 +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: retq +; +; SSE42-LABEL: test_cmp_v32i32: +; SSE42: # BB#0: +; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm0 +; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm1 +; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm2 +; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm3 +; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm4 +; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm5 +; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm6 +; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm7 +; SSE42-NEXT: pextrb $12, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: retq +; +; AVX1-LABEL: test_cmp_v32i32: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm8 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm9 +; AVX1-NEXT: vpcmpgtd %xmm8, %xmm9, %xmm8 +; AVX1-NEXT: vpcmpgtd %xmm7, %xmm3, %xmm3 +; AVX1-NEXT: vpacksswb %xmm8, %xmm3, %xmm8 +; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm7 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 +; AVX1-NEXT: vpcmpgtd %xmm7, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpgtd %xmm6, %xmm2, %xmm2 ; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpacksswb %xmm8, %xmm2, %xmm2 @@ -4330,6 +4330,7 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind { ; SSE2-LABEL: test_cmp_v64i16: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm1 ; SSE2-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm0 ; SSE2-NEXT: packsswb %xmm1, %xmm0 @@ -4343,206 +4344,206 @@ ; SSE2-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm6 ; SSE2-NEXT: packsswb %xmm7, %xmm6 ; SSE2-NEXT: movdqa %xmm6, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 6(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 6(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) ; SSE2-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 4(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 4(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v64i16: ; SSE42: # BB#0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm1 ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm2 @@ -4551,211 +4552,211 @@ ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm5 ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm6 ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm7 -; SSE42-NEXT: pextrb $14, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $12, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $10, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $8, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $6, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $4, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $2, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $0, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $14, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $12, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $10, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $8, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $6, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $4, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $2, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $0, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $14, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $12, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $10, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $8, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $6, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $4, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $2, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $0, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $14, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $12, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $10, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $8, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $6, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $4, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $2, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $0, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $14, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $10, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $6, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $14, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $10, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $6, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $14, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $10, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $6, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $2, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $14, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $10, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $6, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $2, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: movq %rdi, %rax -; SSE42-NEXT: retq -; -; AVX1-LABEL: test_cmp_v64i16: -; AVX1: # BB#0: -; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm8 -; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtw %xmm5, %xmm1, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm5 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vpcmpgtw %xmm5, %xmm1, %xmm1 +; SSE42-NEXT: pextrb $14, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $12, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $10, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $8, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $6, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $4, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $2, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $0, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $14, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $12, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $10, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $8, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $6, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $4, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $2, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $0, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $14, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $12, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $10, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $8, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $6, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $4, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $2, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $0, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $14, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $12, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $10, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $8, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $6, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $4, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $2, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $0, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $14, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $10, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $6, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $14, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $10, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $6, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $14, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $10, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $6, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $2, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $14, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $10, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $6, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $2, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: retq +; +; AVX1-LABEL: test_cmp_v64i16: +; AVX1: # BB#0: +; AVX1-NEXT: movq %rdi, %rax +; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm8 +; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtw %xmm5, %xmm1, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm5 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vpcmpgtw %xmm5, %xmm1, %xmm1 ; AVX1-NEXT: vpcmpgtw %xmm6, %xmm2, %xmm5 ; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm6 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 @@ -4764,204 +4765,204 @@ ; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 ; AVX1-NEXT: vpcmpgtw %xmm7, %xmm3, %xmm3 -; AVX1-NEXT: vpextrb $14, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm6, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm6, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm6, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm6, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm6, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm6, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm6, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm6, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm5, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm5, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm5, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm5, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm5, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm5, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm5, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm5, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $12, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $10, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $8, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $6, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $4, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $2, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $0, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $14, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $12, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $10, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $8, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $6, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $4, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $2, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $0, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $14, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $12, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $10, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $8, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $6, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $4, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $2, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $0, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $14, %xmm8, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $12, %xmm8, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $10, %xmm8, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $8, %xmm8, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $6, %xmm8, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $4, %xmm8, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $2, %xmm8, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $0, %xmm8, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: movq %rdi, %rax +; AVX1-NEXT: vpextrb $14, %xmm3, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $12, %xmm3, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $10, %xmm3, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $8, %xmm3, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $6, %xmm3, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $4, %xmm3, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $2, %xmm3, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $0, %xmm3, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $14, %xmm6, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $12, %xmm6, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $10, %xmm6, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $8, %xmm6, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $6, %xmm6, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $4, %xmm6, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $2, %xmm6, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $0, %xmm6, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $14, %xmm2, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $12, %xmm2, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $10, %xmm2, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $8, %xmm2, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $6, %xmm2, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $4, %xmm2, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $2, %xmm2, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $0, %xmm2, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $14, %xmm5, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $12, %xmm5, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $10, %xmm5, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $8, %xmm5, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $6, %xmm5, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $4, %xmm5, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $2, %xmm5, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $0, %xmm5, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $14, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $12, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $10, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $8, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $6, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $4, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $2, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $0, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $14, %xmm4, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $12, %xmm4, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $10, %xmm4, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $8, %xmm4, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $6, %xmm4, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $4, %xmm4, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $2, %xmm4, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $0, %xmm4, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $14, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $12, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $10, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $8, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $6, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $4, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $2, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $0, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $14, %xmm8, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $12, %xmm8, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $10, %xmm8, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $8, %xmm8, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $6, %xmm8, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $4, %xmm8, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $2, %xmm8, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $0, %xmm8, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: movb %cl, (%rax) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_cmp_v64i16: ; AVX2: # BB#0: +; AVX2-NEXT: movq %rdi, %rax ; AVX2-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 ; AVX2-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1 @@ -4970,199 +4971,198 @@ ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm6 ; AVX2-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3 ; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm7 -; AVX2-NEXT: vpextrb $14, %xmm7, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm7, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm7, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm7, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm7, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm7, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm7, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm7, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm5, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm5, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm5, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm5, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm5, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm5, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm5, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $0, %xmm5, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $14, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $0, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $14, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $0, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $14, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $0, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: movq %rdi, %rax +; AVX2-NEXT: vpextrb $14, %xmm7, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $12, %xmm7, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $10, %xmm7, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $8, %xmm7, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $6, %xmm7, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $4, %xmm7, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $2, %xmm7, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $0, %xmm7, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $14, %xmm3, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $12, %xmm3, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $10, %xmm3, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $8, %xmm3, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $6, %xmm3, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $4, %xmm3, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $2, %xmm3, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $0, %xmm3, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $14, %xmm6, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $12, %xmm6, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $10, %xmm6, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $8, %xmm6, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $6, %xmm6, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $4, %xmm6, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $2, %xmm6, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $0, %xmm6, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $14, %xmm2, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $12, %xmm2, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $10, %xmm2, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $8, %xmm2, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $6, %xmm2, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $4, %xmm2, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $2, %xmm2, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $0, %xmm2, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $14, %xmm5, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $12, %xmm5, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $10, %xmm5, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $8, %xmm5, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $6, %xmm5, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $4, %xmm5, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $2, %xmm5, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $0, %xmm5, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $14, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $12, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $10, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $8, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $6, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $4, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $2, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $0, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $14, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $12, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $10, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $8, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $6, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $4, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $2, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $0, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $14, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $12, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $10, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $8, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $6, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $4, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $2, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $0, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: movb %cl, (%rax) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -6157,6 +6157,7 @@ ; SSE2-LABEL: test_cmp_v128i8: ; SSE2: # BB#0: ; SSE2-NEXT: pushq %rax +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm0 ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm1 ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm2 @@ -6166,403 +6167,403 @@ ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm6 ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm7 ; SSE2-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 14(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 14(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 14(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 14(%rax) ; SSE2-NEXT: movdqa %xmm6, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 12(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 12(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 12(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 12(%rax) ; SSE2-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 10(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 10(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 10(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 10(%rax) ; SSE2-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 8(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 8(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 8(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 8(%rax) ; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 6(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 6(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 6(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 6(%rax) ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 4(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 4(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 4(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 4(%rax) ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) ; SSE2-NEXT: popq %rcx ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v128i8: ; SSE42: # BB#0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm1 ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm2 @@ -6571,395 +6572,395 @@ ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm5 ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm6 ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm7 -; SSE42-NEXT: pextrb $15, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $14, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $13, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $12, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $11, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $10, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $9, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $8, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $7, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $6, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $5, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $4, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $3, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $2, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $1, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $0, %xmm7, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 14(%rdi) -; SSE42-NEXT: pextrb $15, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $14, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $13, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $12, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $11, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $10, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $9, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $8, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $7, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $6, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $5, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $4, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $3, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $2, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $1, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $0, %xmm6, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 12(%rdi) -; SSE42-NEXT: pextrb $15, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $14, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $13, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $12, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $11, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $10, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $9, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $8, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $7, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $6, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $5, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $4, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $3, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $2, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $1, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $0, %xmm5, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 10(%rdi) -; SSE42-NEXT: pextrb $15, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $14, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $13, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $12, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $11, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $10, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $9, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $8, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $7, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $6, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $5, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $4, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $3, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $2, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $1, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $0, %xmm4, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 8(%rdi) -; SSE42-NEXT: pextrb $15, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $14, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $13, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $12, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $11, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $10, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $9, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $8, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $7, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $6, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $5, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $4, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $3, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $2, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $1, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $0, %xmm3, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 6(%rdi) -; SSE42-NEXT: pextrb $15, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $14, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $13, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $12, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $11, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $10, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $9, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $8, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $7, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $6, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $5, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $4, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $3, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $2, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $1, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $0, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 4(%rdi) -; SSE42-NEXT: pextrb $15, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $14, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $13, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $11, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $10, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $9, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $7, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $6, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $5, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $3, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $1, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm1, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $15, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $14, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $13, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $11, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $10, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $9, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $7, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $6, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $5, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $3, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $2, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $1, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $15, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $14, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $13, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $12, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $11, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $10, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $9, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $8, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $7, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $6, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $5, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $4, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $3, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $2, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $1, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $0, %xmm7, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 14(%rax) +; SSE42-NEXT: pextrb $15, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $14, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $13, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $12, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $11, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $10, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $9, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $8, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $7, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $6, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $5, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $4, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $3, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $2, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $1, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $0, %xmm6, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 12(%rax) +; SSE42-NEXT: pextrb $15, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $14, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $13, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $12, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $11, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $10, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $9, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $8, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $7, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $6, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $5, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $4, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $3, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $2, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $1, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $0, %xmm5, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 10(%rax) +; SSE42-NEXT: pextrb $15, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $14, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $13, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $12, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $11, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $10, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $9, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $8, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $7, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $6, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $5, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $4, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $3, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $2, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $1, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $0, %xmm4, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 8(%rax) +; SSE42-NEXT: pextrb $15, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $14, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $13, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $12, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $11, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $10, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $9, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $8, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $7, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $6, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $5, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $4, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $3, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $2, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $1, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $0, %xmm3, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 6(%rax) +; SSE42-NEXT: pextrb $15, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $14, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $13, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $12, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $11, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $10, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $9, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $8, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $7, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $6, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $5, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $4, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $3, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $2, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $1, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $0, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 4(%rax) +; SSE42-NEXT: pextrb $15, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $14, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $13, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $11, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $10, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $9, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $7, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $6, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $5, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $3, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $1, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $15, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $14, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $13, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $11, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $10, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $9, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $7, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $6, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $5, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $3, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $2, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $1, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v128i8: ; AVX1: # BB#0: +; AVX1-NEXT: movq %rdi, %rax ; AVX1-NEXT: vpcmpgtb %xmm4, %xmm0, %xmm8 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 @@ -6976,794 +6977,794 @@ ; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 ; AVX1-NEXT: vpcmpgtb %xmm7, %xmm3, %xmm3 -; AVX1-NEXT: vpextrb $15, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm6, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 12(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm5, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 8(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $0, %xmm4, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $14, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $13, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $12, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $11, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $10, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $9, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $8, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $7, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $6, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $5, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $4, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $3, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $2, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $1, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $0, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $15, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $14, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $13, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $12, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $11, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $10, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $9, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $8, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $7, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $6, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $5, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $4, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $3, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $2, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $1, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $0, %xmm8, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: movq %rdi, %rax +; AVX1-NEXT: vpextrb $15, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $14, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $13, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $12, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $11, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $10, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $9, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $8, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $7, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $6, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $5, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $4, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $3, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $2, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $1, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $0, %xmm3, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $15, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $14, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $13, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $12, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $11, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $10, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $9, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $8, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $7, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $6, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $5, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $4, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $3, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $2, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $1, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $0, %xmm6, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 12(%rax) +; AVX1-NEXT: vpextrb $15, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $14, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $13, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $12, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $11, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $10, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $9, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $8, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $7, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $6, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $5, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $4, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $3, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $2, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $1, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $0, %xmm2, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $15, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $14, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $13, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $12, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $11, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $10, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $9, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $8, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $7, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $6, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $5, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $4, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $3, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $2, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $1, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $0, %xmm5, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 8(%rax) +; AVX1-NEXT: vpextrb $15, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $14, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $13, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $12, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $11, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $10, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $9, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $8, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $7, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $6, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $5, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $4, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $3, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $2, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $1, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $0, %xmm1, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $15, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $14, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $13, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $12, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $11, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $10, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $9, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $8, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $7, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $6, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $5, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $4, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $3, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $2, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $1, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $0, %xmm4, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, 4(%rax) +; AVX1-NEXT: vpextrb $15, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $14, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $13, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $12, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $11, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $10, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $9, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $8, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $7, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $6, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $5, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $4, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $3, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $2, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $1, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $0, %xmm0, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $15, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $14, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $13, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $12, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $11, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $10, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $9, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $8, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $7, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $6, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $5, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $4, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $3, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $2, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $1, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) +; AVX1-NEXT: vpextrb $0, %xmm8, %ecx +; AVX1-NEXT: andb $1, %cl +; AVX1-NEXT: movb %cl, (%rax) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_cmp_v128i8: ; AVX2: # BB#0: +; AVX2-NEXT: movq %rdi, %rax ; AVX2-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm1 ; AVX2-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm2 ; AVX2-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm3 ; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 -; AVX2-NEXT: vpextrb $15, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm4, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $15, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 12(%rdi) +; AVX2-NEXT: vpextrb $15, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $14, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $13, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $12, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $11, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $10, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $9, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $8, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $7, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $6, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $5, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $4, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $3, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $2, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $1, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $0, %xmm4, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $15, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $14, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $13, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $12, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $11, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $10, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $9, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $8, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $7, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $6, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $5, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $4, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $3, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $2, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $1, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) +; AVX2-NEXT: vpextrb $0, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 12(%rax) ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX2-NEXT: vpextrb $15, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm3, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $15, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 8(%rdi) +; AVX2-NEXT: vpextrb $15, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $14, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $13, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $12, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $11, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $10, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $9, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $8, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $7, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $6, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $5, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $4, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $3, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $2, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $1, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $0, %xmm3, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $15, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $14, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $13, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $12, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $11, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $10, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $9, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $8, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $7, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $6, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $5, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $4, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $3, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $2, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $1, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) +; AVX2-NEXT: vpextrb $0, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 8(%rax) ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrb $15, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm2, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $15, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $0, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, 4(%rdi) +; AVX2-NEXT: vpextrb $15, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $14, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $13, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $12, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $11, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $10, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $9, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $8, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $7, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $6, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $5, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $4, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $3, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $2, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $1, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $0, %xmm2, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $15, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $14, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $13, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $12, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $11, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $10, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $9, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $8, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $7, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $6, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $5, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $4, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $3, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $2, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $1, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) +; AVX2-NEXT: vpextrb $0, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, 4(%rax) ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpextrb $15, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $14, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $13, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $11, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $9, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $7, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $5, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $3, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $1, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $0, %xmm1, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $15, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $14, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $13, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $11, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $9, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $7, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $5, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $3, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $1, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $0, %xmm0, %eax -; AVX2-NEXT: andb $1, %al -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: movq %rdi, %rax +; AVX2-NEXT: vpextrb $15, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $14, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $13, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $12, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $11, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $10, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $9, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $8, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $7, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $6, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $5, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $4, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $3, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $2, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $1, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $0, %xmm1, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $15, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $14, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $13, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $12, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $11, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $10, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $9, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $8, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $7, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $6, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $5, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $4, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $3, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $2, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $1, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) +; AVX2-NEXT: vpextrb $0, %xmm0, %ecx +; AVX2-NEXT: andb $1, %cl +; AVX2-NEXT: movb %cl, (%rax) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512F-LABEL: test_cmp_v128i8: ; AVX512F: # BB#0: +; AVX512F-NEXT: movq %rdi, %rax ; AVX512F-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm1 ; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm2 @@ -7772,44 +7773,44 @@ ; AVX512F-NEXT: vpmovsxbd %xmm4, %zmm4 ; AVX512F-NEXT: vpslld $31, %zmm4, %zmm4 ; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k0 -; AVX512F-NEXT: kmovw %k0, 14(%rdi) +; AVX512F-NEXT: kmovw %k0, 14(%rax) ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 ; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512F-NEXT: kmovw %k0, 12(%rdi) +; AVX512F-NEXT: kmovw %k0, 12(%rax) ; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 ; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512F-NEXT: kmovw %k0, 10(%rdi) +; AVX512F-NEXT: kmovw %k0, 10(%rax) ; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 ; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512F-NEXT: kmovw %k0, 8(%rdi) +; AVX512F-NEXT: kmovw %k0, 8(%rax) ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 ; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512F-NEXT: kmovw %k0, 6(%rdi) +; AVX512F-NEXT: kmovw %k0, 6(%rax) ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 ; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, 4(%rdi) +; AVX512F-NEXT: kmovw %k0, 4(%rax) ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 ; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, 2(%rdi) +; AVX512F-NEXT: kmovw %k0, 2(%rax) ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, (%rdi) -; AVX512F-NEXT: movq %rdi, %rax +; AVX512F-NEXT: kmovw %k0, (%rax) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512DQ-LABEL: test_cmp_v128i8: ; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: movq %rdi, %rax ; AVX512DQ-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm2 @@ -7818,39 +7819,38 @@ ; AVX512DQ-NEXT: vpmovsxbd %xmm4, %zmm4 ; AVX512DQ-NEXT: vpslld $31, %zmm4, %zmm4 ; AVX512DQ-NEXT: vptestmd %zmm4, %zmm4, %k0 -; AVX512DQ-NEXT: kmovw %k0, 14(%rdi) +; AVX512DQ-NEXT: kmovw %k0, 14(%rax) ; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 ; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3 ; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512DQ-NEXT: kmovw %k0, 12(%rdi) +; AVX512DQ-NEXT: kmovw %k0, 12(%rax) ; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 ; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3 ; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512DQ-NEXT: kmovw %k0, 10(%rdi) +; AVX512DQ-NEXT: kmovw %k0, 10(%rax) ; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 ; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 ; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512DQ-NEXT: kmovw %k0, 8(%rdi) +; AVX512DQ-NEXT: kmovw %k0, 8(%rax) ; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 ; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 ; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512DQ-NEXT: kmovw %k0, 6(%rdi) +; AVX512DQ-NEXT: kmovw %k0, 6(%rax) ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512DQ-NEXT: kmovw %k0, 4(%rdi) +; AVX512DQ-NEXT: kmovw %k0, 4(%rax) ; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) +; AVX512DQ-NEXT: kmovw %k0, 2(%rax) ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512DQ-NEXT: kmovw %k0, (%rdi) -; AVX512DQ-NEXT: movq %rdi, %rax +; AVX512DQ-NEXT: kmovw %k0, (%rax) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; @@ -7872,6 +7872,7 @@ define <32 x i1> @test_cmp_v32f64(<32 x double> %a0, <32 x double> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32f64: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: cmpltpd %xmm5, %xmm8 ; SSE2-NEXT: movapd {{[0-9]+}}(%rsp), %xmm5 @@ -7956,104 +7957,103 @@ ; SSE2-NEXT: andpd %xmm2, %xmm1 ; SSE2-NEXT: packuswb %xmm3, %xmm1 ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32f64: @@ -8064,6 +8064,7 @@ ; SSE42-NEXT: pushq %r13 ; SSE42-NEXT: pushq %r12 ; SSE42-NEXT: pushq %rbx +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8 ; SSE42-NEXT: cmpltpd %xmm0, %xmm8 ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm0 @@ -8095,108 +8096,107 @@ ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm7 ; SSE42-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm7 ; SSE42-NEXT: pextrb $8, %xmm7, %r12d -; SSE42-NEXT: pextrb $0, %xmm7, %ecx +; SSE42-NEXT: pextrb $0, %xmm7, %edx ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm7 ; SSE42-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm7 ; SSE42-NEXT: pextrb $8, %xmm7, %r11d -; SSE42-NEXT: pextrb $0, %xmm7, %edx +; SSE42-NEXT: pextrb $0, %xmm7, %esi ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm7 ; SSE42-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm7 ; SSE42-NEXT: pextrb $8, %xmm7, %r9d -; SSE42-NEXT: pextrb $0, %xmm7, %esi +; SSE42-NEXT: pextrb $0, %xmm7, %edi ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm7 ; SSE42-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm7 -; SSE42-NEXT: pextrb $8, %xmm7, %eax -; SSE42-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill +; SSE42-NEXT: pextrb $8, %xmm7, %ecx +; SSE42-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill ; SSE42-NEXT: pextrb $0, %xmm7, %r10d ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm7 ; SSE42-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm7 ; SSE42-NEXT: andb $1, %r8b -; SSE42-NEXT: movb %r8b, 2(%rdi) +; SSE42-NEXT: movb %r8b, 2(%rax) ; SSE42-NEXT: andb $1, %r13b -; SSE42-NEXT: movb %r13b, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm7, %eax -; SSE42-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill +; SSE42-NEXT: movb %r13b, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm7, %ecx +; SSE42-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill ; SSE42-NEXT: pextrb $0, %xmm7, %r13d ; SSE42-NEXT: andb $1, %r14b -; SSE42-NEXT: movb %r14b, 2(%rdi) +; SSE42-NEXT: movb %r14b, 2(%rax) ; SSE42-NEXT: andb $1, %bl -; SSE42-NEXT: movb %bl, 2(%rdi) +; SSE42-NEXT: movb %bl, 2(%rax) ; SSE42-NEXT: pextrb $8, %xmm6, %r14d ; SSE42-NEXT: pextrb $0, %xmm6, %r8d ; SSE42-NEXT: andb $1, %r15b -; SSE42-NEXT: movb %r15b, 2(%rdi) +; SSE42-NEXT: movb %r15b, 2(%rax) ; SSE42-NEXT: andb $1, %bpl -; SSE42-NEXT: movb %bpl, 2(%rdi) +; SSE42-NEXT: movb %bpl, 2(%rax) ; SSE42-NEXT: pextrb $8, %xmm5, %r15d ; SSE42-NEXT: pextrb $0, %xmm5, %ebp ; SSE42-NEXT: andb $1, %r12b -; SSE42-NEXT: movb %r12b, 2(%rdi) -; SSE42-NEXT: andb $1, %cl -; SSE42-NEXT: movb %cl, 2(%rdi) +; SSE42-NEXT: movb %r12b, 2(%rax) +; SSE42-NEXT: andb $1, %dl +; SSE42-NEXT: movb %dl, 2(%rax) ; SSE42-NEXT: pextrb $8, %xmm4, %r12d ; SSE42-NEXT: pextrb $0, %xmm4, %ebx ; SSE42-NEXT: andb $1, %r11b -; SSE42-NEXT: movb %r11b, 2(%rdi) -; SSE42-NEXT: andb $1, %dl -; SSE42-NEXT: movb %dl, 2(%rdi) +; SSE42-NEXT: movb %r11b, 2(%rax) +; SSE42-NEXT: andb $1, %sil +; SSE42-NEXT: movb %sil, 2(%rax) ; SSE42-NEXT: pextrb $8, %xmm3, %r11d -; SSE42-NEXT: pextrb $0, %xmm3, %ecx +; SSE42-NEXT: pextrb $0, %xmm3, %edx ; SSE42-NEXT: andb $1, %r9b -; SSE42-NEXT: movb %r9b, 2(%rdi) -; SSE42-NEXT: andb $1, %sil -; SSE42-NEXT: movb %sil, 2(%rdi) +; SSE42-NEXT: movb %r9b, 2(%rax) +; SSE42-NEXT: andb $1, %dil +; SSE42-NEXT: movb %dil, 2(%rax) ; SSE42-NEXT: pextrb $8, %xmm2, %r9d -; SSE42-NEXT: pextrb $0, %xmm2, %edx -; SSE42-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) +; SSE42-NEXT: pextrb $0, %xmm2, %esi +; SSE42-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # 4-byte Reload +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) ; SSE42-NEXT: andb $1, %r10b -; SSE42-NEXT: movb %r10b, 2(%rdi) +; SSE42-NEXT: movb %r10b, 2(%rax) ; SSE42-NEXT: pextrb $8, %xmm1, %r10d -; SSE42-NEXT: pextrb $0, %xmm1, %esi -; SSE42-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) +; SSE42-NEXT: pextrb $0, %xmm1, %edi +; SSE42-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # 4-byte Reload +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) ; SSE42-NEXT: andb $1, %r13b -; SSE42-NEXT: movb %r13b, 2(%rdi) +; SSE42-NEXT: movb %r13b, 2(%rax) ; SSE42-NEXT: pextrb $8, %xmm0, %r13d -; SSE42-NEXT: pextrb $0, %xmm0, %eax +; SSE42-NEXT: pextrb $0, %xmm0, %ecx ; SSE42-NEXT: andb $1, %r14b -; SSE42-NEXT: movb %r14b, (%rdi) +; SSE42-NEXT: movb %r14b, (%rax) ; SSE42-NEXT: andb $1, %r8b -; SSE42-NEXT: movb %r8b, (%rdi) +; SSE42-NEXT: movb %r8b, (%rax) ; SSE42-NEXT: pextrb $8, %xmm8, %r8d ; SSE42-NEXT: pextrb $0, %xmm8, %r14d ; SSE42-NEXT: andb $1, %r15b -; SSE42-NEXT: movb %r15b, (%rdi) +; SSE42-NEXT: movb %r15b, (%rax) ; SSE42-NEXT: andb $1, %bpl -; SSE42-NEXT: movb %bpl, (%rdi) +; SSE42-NEXT: movb %bpl, (%rax) ; SSE42-NEXT: andb $1, %r12b -; SSE42-NEXT: movb %r12b, (%rdi) +; SSE42-NEXT: movb %r12b, (%rax) ; SSE42-NEXT: andb $1, %bl -; SSE42-NEXT: movb %bl, (%rdi) +; SSE42-NEXT: movb %bl, (%rax) ; SSE42-NEXT: andb $1, %r11b -; SSE42-NEXT: movb %r11b, (%rdi) -; SSE42-NEXT: andb $1, %cl -; SSE42-NEXT: movb %cl, (%rdi) -; SSE42-NEXT: andb $1, %r9b -; SSE42-NEXT: movb %r9b, (%rdi) +; SSE42-NEXT: movb %r11b, (%rax) ; SSE42-NEXT: andb $1, %dl -; SSE42-NEXT: movb %dl, (%rdi) -; SSE42-NEXT: andb $1, %r10b -; SSE42-NEXT: movb %r10b, (%rdi) +; SSE42-NEXT: movb %dl, (%rax) +; SSE42-NEXT: andb $1, %r9b +; SSE42-NEXT: movb %r9b, (%rax) ; SSE42-NEXT: andb $1, %sil -; SSE42-NEXT: movb %sil, (%rdi) +; SSE42-NEXT: movb %sil, (%rax) +; SSE42-NEXT: andb $1, %r10b +; SSE42-NEXT: movb %r10b, (%rax) +; SSE42-NEXT: andb $1, %dil +; SSE42-NEXT: movb %dil, (%rax) ; SSE42-NEXT: andb $1, %r13b -; SSE42-NEXT: movb %r13b, (%rdi) -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) +; SSE42-NEXT: movb %r13b, (%rax) +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) ; SSE42-NEXT: andb $1, %r8b -; SSE42-NEXT: movb %r8b, (%rdi) +; SSE42-NEXT: movb %r8b, (%rax) ; SSE42-NEXT: andb $1, %r14b -; SSE42-NEXT: movb %r14b, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: movb %r14b, (%rax) ; SSE42-NEXT: popq %rbx ; SSE42-NEXT: popq %r12 ; SSE42-NEXT: popq %r13 @@ -8979,6 +8979,7 @@ define <32 x i1> @test_cmp_v32i64(<32 x i64> %a0, <32 x i64> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32i64: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0] ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 ; SSE2-NEXT: pxor %xmm8, %xmm9 @@ -9213,108 +9214,108 @@ ; SSE2-NEXT: andpd %xmm10, %xmm1 ; SSE2-NEXT: packuswb %xmm4, %xmm1 ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, (%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, (%rdi) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, (%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, (%rax) ; SSE2-NEXT: movdqa %xmm9, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-NEXT: andb $1, %cl -; SSE2-NEXT: movb %cl, 2(%rdi) -; SSE2-NEXT: andb $1, %al -; SSE2-NEXT: movb %al, 2(%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movb %dl, 2(%rax) +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movb %cl, 2(%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32i64: ; SSE42: # BB#0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 ; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 ; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 @@ -9373,103 +9374,102 @@ ; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm9[4,5,6,7] ; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm8[0,1],xmm0[2,3,4,5,6,7] ; SSE42-NEXT: packsswb %xmm3, %xmm0 -; SSE42-NEXT: pextrb $15, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $14, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $13, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $12, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $11, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $10, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $9, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $7, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $6, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $5, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $3, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $1, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $0, %xmm0, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, 2(%rdi) -; SSE42-NEXT: pextrb $15, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $14, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $13, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $12, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $11, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $10, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $9, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $8, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $7, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $6, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $5, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $4, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $3, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $2, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $1, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: pextrb $0, %xmm2, %eax -; SSE42-NEXT: andb $1, %al -; SSE42-NEXT: movb %al, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $15, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $14, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $13, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $11, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $10, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $9, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $7, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $6, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $5, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $3, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $1, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $0, %xmm0, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, 2(%rax) +; SSE42-NEXT: pextrb $15, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $14, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $13, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $12, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $11, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $10, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $9, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $8, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $7, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $6, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $5, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $4, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $3, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $2, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $1, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) +; SSE42-NEXT: pextrb $0, %xmm2, %ecx +; SSE42-NEXT: andb $1, %cl +; SSE42-NEXT: movb %cl, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32i64: Index: test/CodeGen/X86/vector-interleave.ll =================================================================== --- test/CodeGen/X86/vector-interleave.ll +++ test/CodeGen/X86/vector-interleave.ll @@ -10,6 +10,7 @@ define <64 x i16> @interleave8x8(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e, <8 x i16> %f, <8 x i16> %h, <8 x i16> %g) { ; SSE-LABEL: interleave8x8: ; SSE: # BB#0: +; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: movdqa %xmm0, %xmm8 ; SSE-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm1[0],xmm8[1],xmm1[1],xmm8[2],xmm1[2],xmm8[3],xmm1[3] ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] @@ -46,15 +47,14 @@ ; SSE-NEXT: movdqa %xmm3, %xmm4 ; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1],xmm4[2],xmm6[2],xmm4[3],xmm6[3] ; SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm6[4],xmm3[5],xmm6[5],xmm3[6],xmm6[6],xmm3[7],xmm6[7] -; SSE-NEXT: movdqa %xmm3, 112(%rdi) -; SSE-NEXT: movdqa %xmm4, 96(%rdi) -; SSE-NEXT: movdqa %xmm0, 80(%rdi) -; SSE-NEXT: movdqa %xmm7, 64(%rdi) -; SSE-NEXT: movdqa %xmm2, 48(%rdi) -; SSE-NEXT: movdqa %xmm1, 32(%rdi) -; SSE-NEXT: movdqa %xmm8, 16(%rdi) -; SSE-NEXT: movdqa %xmm5, (%rdi) -; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: movdqa %xmm3, 112(%rax) +; SSE-NEXT: movdqa %xmm4, 96(%rax) +; SSE-NEXT: movdqa %xmm0, 80(%rax) +; SSE-NEXT: movdqa %xmm7, 64(%rax) +; SSE-NEXT: movdqa %xmm2, 48(%rax) +; SSE-NEXT: movdqa %xmm1, 32(%rax) +; SSE-NEXT: movdqa %xmm8, 16(%rax) +; SSE-NEXT: movdqa %xmm5, (%rax) ; SSE-NEXT: retq ; ; AVX1-LABEL: interleave8x8: Index: test/CodeGen/X86/vector-pcmp.ll =================================================================== --- test/CodeGen/X86/vector-pcmp.ll +++ test/CodeGen/X86/vector-pcmp.ll @@ -86,14 +86,14 @@ define <1 x i128> @test_strange_type(<1 x i128> %x) { ; SSE2-LABEL: test_strange_type: ; SSE2: # BB#0: -; SSE2-NEXT: sarq $63, %rsi -; SSE2-NEXT: movq %rsi, %xmm0 -; SSE2-NEXT: notq %rsi +; SSE2-NEXT: movq %rsi, %rdx +; SSE2-NEXT: sarq $63, %rdx +; SSE2-NEXT: movq %rdx, %xmm0 +; SSE2-NEXT: notq %rdx ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm0, %xmm1 ; SSE2-NEXT: movq %xmm1, %rax -; SSE2-NEXT: movq %rsi, %rdx ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_strange_type: Index: test/CodeGen/X86/vector-rotate-128.ll =================================================================== --- test/CodeGen/X86/vector-rotate-128.ll +++ test/CodeGen/X86/vector-rotate-128.ll @@ -351,60 +351,61 @@ ; ; SSE41-LABEL: var_rotate_v8i16: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] -; SSE41-NEXT: psubw %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: psubw %xmm3, %xmm2 +; SSE41-NEXT: movdqa %xmm3, %xmm0 ; SSE41-NEXT: psllw $12, %xmm0 -; SSE41-NEXT: psllw $4, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm4 +; SSE41-NEXT: psllw $4, %xmm3 +; SSE41-NEXT: por %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm4 ; SSE41-NEXT: paddw %xmm4, %xmm4 -; SSE41-NEXT: movdqa %xmm3, %xmm6 +; SSE41-NEXT: movdqa %xmm1, %xmm6 ; SSE41-NEXT: psllw $8, %xmm6 -; SSE41-NEXT: movdqa %xmm3, %xmm5 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm1, %xmm5 +; SSE41-NEXT: movdqa %xmm3, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm6, %xmm5 -; SSE41-NEXT: movdqa %xmm5, %xmm1 -; SSE41-NEXT: psllw $4, %xmm1 +; SSE41-NEXT: movdqa %xmm5, %xmm3 +; SSE41-NEXT: psllw $4, %xmm3 ; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm5 -; SSE41-NEXT: movdqa %xmm5, %xmm1 -; SSE41-NEXT: psllw $2, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm5 +; SSE41-NEXT: movdqa %xmm5, %xmm3 +; SSE41-NEXT: psllw $2, %xmm3 ; SSE41-NEXT: paddw %xmm4, %xmm4 ; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm5 -; SSE41-NEXT: movdqa %xmm5, %xmm1 -; SSE41-NEXT: psllw $1, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm5 +; SSE41-NEXT: movdqa %xmm5, %xmm3 +; SSE41-NEXT: psllw $1, %xmm3 ; SSE41-NEXT: paddw %xmm4, %xmm4 ; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm5 +; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm5 ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: psllw $12, %xmm0 ; SSE41-NEXT: psllw $4, %xmm2 ; SSE41-NEXT: por %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: paddw %xmm1, %xmm1 -; SSE41-NEXT: movdqa %xmm3, %xmm4 +; SSE41-NEXT: movdqa %xmm2, %xmm3 +; SSE41-NEXT: paddw %xmm3, %xmm3 +; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: psrlw $8, %xmm4 ; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm2 +; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: psrlw $4, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm2 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: psrlw $2, %xmm2 -; SSE41-NEXT: paddw %xmm1, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm2 +; SSE41-NEXT: paddw %xmm3, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: psrlw $1, %xmm2 -; SSE41-NEXT: paddw %xmm1, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm3 -; SSE41-NEXT: por %xmm5, %xmm3 +; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: por %xmm5, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: var_rotate_v8i16: Index: test/CodeGen/X86/vector-shift-ashr-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-ashr-128.ll +++ test/CodeGen/X86/vector-shift-ashr-128.ll @@ -267,32 +267,33 @@ ; ; SSE41-LABEL: var_shift_v8i16: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: psllw $12, %xmm0 -; SSE41-NEXT: psllw $4, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: psllw $4, %xmm2 +; SSE41-NEXT: por %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm3 ; SSE41-NEXT: paddw %xmm3, %xmm3 -; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: psraw $8, %xmm4 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psraw $4, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psraw $4, %xmm2 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psraw $2, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psraw $2, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psraw $1, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psraw $1, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: var_shift_v8i16: Index: test/CodeGen/X86/vector-shift-lshr-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-lshr-128.ll +++ test/CodeGen/X86/vector-shift-lshr-128.ll @@ -237,32 +237,33 @@ ; ; SSE41-LABEL: var_shift_v8i16: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: psllw $12, %xmm0 -; SSE41-NEXT: psllw $4, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: psllw $4, %xmm2 +; SSE41-NEXT: por %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm3 ; SSE41-NEXT: paddw %xmm3, %xmm3 -; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: psrlw $8, %xmm4 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psrlw $4, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psrlw $4, %xmm2 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psrlw $2, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psrlw $2, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psrlw $1, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psrlw $1, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: var_shift_v8i16: Index: test/CodeGen/X86/vector-shift-shl-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-shl-128.ll +++ test/CodeGen/X86/vector-shift-shl-128.ll @@ -194,32 +194,33 @@ ; ; SSE41-LABEL: var_shift_v8i16: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: psllw $12, %xmm0 -; SSE41-NEXT: psllw $4, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: psllw $4, %xmm2 +; SSE41-NEXT: por %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm3 ; SSE41-NEXT: paddw %xmm3, %xmm3 -; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: psllw $8, %xmm4 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psllw $4, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psllw $4, %xmm2 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psllw $2, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psllw $2, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psllw $1, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psllw $1, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: var_shift_v8i16: Index: test/CodeGen/X86/vector-shuffle-128-v2.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v2.ll +++ test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -164,8 +164,8 @@ define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) { ; SSE2-LABEL: shuffle_v2f64_22: ; SSE2: # BB#0: -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2f64_22: @@ -193,8 +193,8 @@ define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_32: ; SSE: # BB#0: -; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] ; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_32: @@ -208,8 +208,8 @@ define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_33: ; SSE: # BB#0: -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_33: @@ -316,8 +316,8 @@ define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_02_copy: ; SSE: # BB#0: -; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_02_copy: @@ -371,26 +371,26 @@ define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_03_copy: ; SSE2: # BB#0: -; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] ; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_03_copy: ; SSE3: # BB#0: -; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] ; SSE3-NEXT: movapd %xmm2, %xmm0 +; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_03_copy: ; SSSE3: # BB#0: -; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] ; SSSE3-NEXT: movapd %xmm2, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_03_copy: ; SSE41: # BB#0: -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] ; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: shuffle_v2i64_03_copy: @@ -443,26 +443,26 @@ define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_12_copy: ; SSE2: # BB#0: -; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] ; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm2[0] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_12_copy: ; SSE3: # BB#0: -; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] ; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm2[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_12_copy: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_12_copy: ; SSE41: # BB#0: -; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] ; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_12_copy: @@ -488,8 +488,8 @@ define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_13_copy: ; SSE: # BB#0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_13_copy: @@ -516,8 +516,8 @@ define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_20_copy: ; SSE: # BB#0: -; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_20_copy: @@ -568,26 +568,26 @@ define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_21_copy: ; SSE2: # BB#0: -; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_21_copy: ; SSE3: # BB#0: -; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_21_copy: ; SSSE3: # BB#0: -; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSSE3-NEXT: movapd %xmm1, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_21_copy: ; SSE41: # BB#0: -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] ; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: shuffle_v2i64_21_copy: @@ -640,26 +640,26 @@ define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_30_copy: ; SSE2: # BB#0: -; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] ; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_30_copy: ; SSE3: # BB#0: -; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] ; SSE3-NEXT: movapd %xmm2, %xmm0 +; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_30_copy: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_30_copy: ; SSE41: # BB#0: -; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_30_copy: @@ -686,8 +686,8 @@ define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_31_copy: ; SSE: # BB#0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_31_copy: Index: test/CodeGen/X86/vector-shuffle-combining-sse4a.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining-sse4a.ll +++ test/CodeGen/X86/vector-shuffle-combining-sse4a.ll @@ -33,8 +33,8 @@ define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) { ; SSSE3-LABEL: combine_insertqi_pshufb_16i8: ; SSSE3: # BB#0: -; SSSE3-NEXT: extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] ; SSSE3-NEXT: retq ; ; SSE42-LABEL: combine_insertqi_pshufb_16i8: @@ -54,8 +54,8 @@ define <8 x i16> @combine_insertqi_pshufb_8i16(<8 x i16> %a0, <8 x i16> %a1) { ; SSSE3-LABEL: combine_insertqi_pshufb_8i16: ; SSSE3: # BB#0: -; SSSE3-NEXT: extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] ; SSSE3-NEXT: retq ; ; SSE42-LABEL: combine_insertqi_pshufb_8i16: Index: test/CodeGen/X86/vector-shuffle-combining-ssse3.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -606,8 +606,8 @@ define <16 x i8> @combine_unpckl_arg1_pshufb(<16 x i8> %a0, <16 x i8> %a1) { ; SSE-LABEL: combine_unpckl_arg1_pshufb: ; SSE: # BB#0: -; SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero ; SSE-NEXT: retq ; ; AVX-LABEL: combine_unpckl_arg1_pshufb: Index: test/CodeGen/X86/vector-shuffle-combining.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining.ll +++ test/CodeGen/X86/vector-shuffle-combining.ll @@ -1705,8 +1705,8 @@ define <4 x float> @combine_test1b(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: combine_test1b: ; SSE: # BB#0: -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,2,0] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,2,0] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test1b: @@ -1721,8 +1721,8 @@ define <4 x float> @combine_test2b(<4 x float> %a, <4 x float> %b) { ; SSE2-LABEL: combine_test2b: ; SSE2: # BB#0: -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: combine_test2b: @@ -1776,8 +1776,8 @@ define <4 x float> @combine_test4b(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: combine_test4b: ; SSE: # BB#0: -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test4b: @@ -2846,30 +2846,30 @@ define <8 x float> @PR22412(<8 x float> %a, <8 x float> %b) { ; SSE2-LABEL: PR22412: ; SSE2: # BB#0: # %entry +; SSE2-NEXT: movaps %xmm3, %xmm1 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] ; SSE2-NEXT: movapd %xmm2, %xmm0 -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2] -; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,2] -; SSE2-NEXT: movaps %xmm3, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[3,2] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[3,2] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: PR22412: ; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: movaps %xmm3, %xmm1 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] ; SSSE3-NEXT: movapd %xmm2, %xmm0 -; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2] -; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,2] -; SSSE3-NEXT: movaps %xmm3, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[3,2] +; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[3,2] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: PR22412: ; SSE41: # BB#0: # %entry -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] -; SSE41-NEXT: movapd %xmm0, %xmm1 -; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm3[3,2] -; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm0[3,2] -; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] +; SSE41-NEXT: movapd %xmm0, %xmm2 +; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[3,2] +; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[3,2] +; SSE41-NEXT: movaps %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: PR22412: Index: test/CodeGen/X86/vector-zext.ll =================================================================== --- test/CodeGen/X86/vector-zext.ll +++ test/CodeGen/X86/vector-zext.ll @@ -2100,6 +2100,7 @@ define <32 x i32> @zext_32i8_to_32i32(<32 x i8> %x) { ; SSE2-LABEL: zext_32i8_to_32i32: ; SSE2: # BB#0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] @@ -2119,19 +2120,19 @@ ; SSE2-NEXT: movdqa %xmm1, %xmm4 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; SSE2-NEXT: movdqa %xmm1, 112(%rdi) -; SSE2-NEXT: movdqa %xmm4, 96(%rdi) -; SSE2-NEXT: movdqa %xmm6, 80(%rdi) -; SSE2-NEXT: movdqa %xmm7, 64(%rdi) -; SSE2-NEXT: movdqa %xmm0, 48(%rdi) -; SSE2-NEXT: movdqa %xmm5, 32(%rdi) -; SSE2-NEXT: movdqa %xmm3, 16(%rdi) -; SSE2-NEXT: movdqa %xmm8, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movdqa %xmm1, 112(%rax) +; SSE2-NEXT: movdqa %xmm4, 96(%rax) +; SSE2-NEXT: movdqa %xmm6, 80(%rax) +; SSE2-NEXT: movdqa %xmm7, 64(%rax) +; SSE2-NEXT: movdqa %xmm0, 48(%rax) +; SSE2-NEXT: movdqa %xmm5, 32(%rax) +; SSE2-NEXT: movdqa %xmm3, 16(%rax) +; SSE2-NEXT: movdqa %xmm8, (%rax) ; SSE2-NEXT: retq ; ; SSSE3-LABEL: zext_32i8_to_32i32: ; SSSE3: # BB#0: +; SSSE3-NEXT: movq %rdi, %rax ; SSSE3-NEXT: pxor %xmm2, %xmm2 ; SSSE3-NEXT: movdqa %xmm0, %xmm3 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] @@ -2151,19 +2152,19 @@ ; SSSE3-NEXT: movdqa %xmm1, %xmm4 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; SSSE3-NEXT: movdqa %xmm1, 112(%rdi) -; SSSE3-NEXT: movdqa %xmm4, 96(%rdi) -; SSSE3-NEXT: movdqa %xmm6, 80(%rdi) -; SSSE3-NEXT: movdqa %xmm7, 64(%rdi) -; SSSE3-NEXT: movdqa %xmm0, 48(%rdi) -; SSSE3-NEXT: movdqa %xmm5, 32(%rdi) -; SSSE3-NEXT: movdqa %xmm3, 16(%rdi) -; SSSE3-NEXT: movdqa %xmm8, (%rdi) -; SSSE3-NEXT: movq %rdi, %rax +; SSSE3-NEXT: movdqa %xmm1, 112(%rax) +; SSSE3-NEXT: movdqa %xmm4, 96(%rax) +; SSSE3-NEXT: movdqa %xmm6, 80(%rax) +; SSSE3-NEXT: movdqa %xmm7, 64(%rax) +; SSSE3-NEXT: movdqa %xmm0, 48(%rax) +; SSSE3-NEXT: movdqa %xmm5, 32(%rax) +; SSSE3-NEXT: movdqa %xmm3, 16(%rax) +; SSSE3-NEXT: movdqa %xmm8, (%rax) ; SSSE3-NEXT: retq ; ; SSE41-LABEL: zext_32i8_to_32i32: ; SSE41: # BB#0: +; SSE41-NEXT: movq %rdi, %rax ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3] ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero @@ -2178,15 +2179,14 @@ ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero -; SSE41-NEXT: movdqa %xmm1, 112(%rdi) -; SSE41-NEXT: movdqa %xmm7, 96(%rdi) -; SSE41-NEXT: movdqa %xmm6, 80(%rdi) -; SSE41-NEXT: movdqa %xmm5, 64(%rdi) -; SSE41-NEXT: movdqa %xmm0, 48(%rdi) -; SSE41-NEXT: movdqa %xmm4, 32(%rdi) -; SSE41-NEXT: movdqa %xmm3, 16(%rdi) -; SSE41-NEXT: movdqa %xmm2, (%rdi) -; SSE41-NEXT: movq %rdi, %rax +; SSE41-NEXT: movdqa %xmm1, 112(%rax) +; SSE41-NEXT: movdqa %xmm7, 96(%rax) +; SSE41-NEXT: movdqa %xmm6, 80(%rax) +; SSE41-NEXT: movdqa %xmm5, 64(%rax) +; SSE41-NEXT: movdqa %xmm0, 48(%rax) +; SSE41-NEXT: movdqa %xmm4, 32(%rax) +; SSE41-NEXT: movdqa %xmm3, 16(%rax) +; SSE41-NEXT: movdqa %xmm2, (%rax) ; SSE41-NEXT: retq ; ; AVX1-LABEL: zext_32i8_to_32i32: Index: test/CodeGen/X86/vectorcall.ll =================================================================== --- test/CodeGen/X86/vectorcall.ll +++ test/CodeGen/X86/vectorcall.ll @@ -22,7 +22,8 @@ } ; X86-LABEL: {{^}}test_int_3@@8: ; X64-LABEL: {{^}}test_int_3@@8: -; CHECK: movl %ecx, %eax +; X64: movq %rcx, %rax +; X86: movl %ecx, %eax define x86_vectorcallcc i32 @test_int_4(i32 inreg %a, i32 inreg %b) { %s = add i32 %a, %b @@ -148,8 +149,8 @@ ret <4 x float> %0 } ; CHECK-LABEL: test_mixed_5 -; CHECK: movaps %xmm5, 16(%{{(e|r)}}sp) ; CHECK: movaps %xmm5, %xmm0 +; CHECK: movaps %xmm0, 16(%{{(e|r)}}sp) ; CHECK: ret{{[ql]}} define x86_vectorcallcc %struct.HVA4 @test_mixed_6(%struct.HVA4 inreg %a, %struct.HVA4* %b) { @@ -183,12 +184,12 @@ ret void } ; CHECK-LABEL: test_mixed_7 -; CHECK: movaps %xmm{{[0-9]}}, 64(%{{rcx|eax}}) -; CHECK: movaps %xmm{{[0-9]}}, 48(%{{rcx|eax}}) -; CHECK: movaps %xmm{{[0-9]}}, 32(%{{rcx|eax}}) -; CHECK: movaps %xmm{{[0-9]}}, 16(%{{rcx|eax}}) -; CHECK: movaps %xmm{{[0-9]}}, (%{{rcx|eax}}) ; X64: mov{{[ql]}} %rcx, %rax +; CHECK: movaps %xmm{{[0-9]}}, 64(%{{esp|rsp}}) +; CHECK: movaps %xmm{{[0-9]}}, 48(%{{esp|rsp}}) +; CHECK: movaps %xmm{{[0-9]}}, 32(%{{esp|rsp}}) +; CHECK: movaps %xmm{{[0-9]}}, 16(%{{esp|rsp}}) +; CHECK: movaps %xmm{{[0-9]}}, (%{{esp|rsp}}) ; CHECK: ret{{[ql]}} define x86_vectorcallcc <4 x float> @test_mixed_8(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f) { Index: test/CodeGen/X86/vselect-minmax.ll =================================================================== --- test/CodeGen/X86/vselect-minmax.ll +++ test/CodeGen/X86/vselect-minmax.ll @@ -4830,26 +4830,27 @@ ; ; SSE4-LABEL: test121: ; SSE4: # BB#0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 ; SSE4-NEXT: movdqa %xmm7, %xmm9 -; SSE4-NEXT: pcmpgtq %xmm3, %xmm9 ; SSE4-NEXT: movdqa %xmm6, %xmm10 -; SSE4-NEXT: pcmpgtq %xmm2, %xmm10 -; SSE4-NEXT: movdqa %xmm5, %xmm11 -; SSE4-NEXT: pcmpgtq %xmm1, %xmm11 +; SSE4-NEXT: movdqa %xmm5, %xmm7 +; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa %xmm9, %xmm11 +; SSE4-NEXT: pcmpgtq %xmm3, %xmm11 +; SSE4-NEXT: pcmpgtq %xmm2, %xmm6 +; SSE4-NEXT: pcmpgtq %xmm1, %xmm5 ; SSE4-NEXT: movdqa %xmm4, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm8, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 +; SSE4-NEXT: movdqa %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm7 +; SSE4-NEXT: movdqa %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm10 ; SSE4-NEXT: movdqa %xmm11, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm9 ; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm7, %xmm1 +; SSE4-NEXT: movapd %xmm10, %xmm2 +; SSE4-NEXT: movapd %xmm9, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test121: @@ -4970,30 +4971,33 @@ ; ; SSE4-LABEL: test122: ; SSE4: # BB#0: # %entry +; SSE4-NEXT: movdqa %xmm7, %xmm9 +; SSE4-NEXT: movdqa %xmm6, %xmm10 +; SSE4-NEXT: movdqa %xmm5, %xmm11 ; SSE4-NEXT: movdqa %xmm0, %xmm8 -; SSE4-NEXT: movdqa %xmm3, %xmm9 -; SSE4-NEXT: pcmpgtq %xmm7, %xmm9 +; SSE4-NEXT: movdqa %xmm3, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm9, %xmm7 ; SSE4-NEXT: pcmpeqd %xmm12, %xmm12 -; SSE4-NEXT: pxor %xmm12, %xmm9 -; SSE4-NEXT: movdqa %xmm2, %xmm10 -; SSE4-NEXT: pcmpgtq %xmm6, %xmm10 -; SSE4-NEXT: pxor %xmm12, %xmm10 -; SSE4-NEXT: movdqa %xmm1, %xmm11 -; SSE4-NEXT: pcmpgtq %xmm5, %xmm11 -; SSE4-NEXT: pxor %xmm12, %xmm11 +; SSE4-NEXT: pxor %xmm12, %xmm7 +; SSE4-NEXT: movdqa %xmm2, %xmm6 +; SSE4-NEXT: pcmpgtq %xmm10, %xmm6 +; SSE4-NEXT: pxor %xmm12, %xmm6 +; SSE4-NEXT: movdqa %xmm1, %xmm5 +; SSE4-NEXT: pcmpgtq %xmm11, %xmm5 +; SSE4-NEXT: pxor %xmm12, %xmm5 ; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 ; SSE4-NEXT: pxor %xmm12, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 -; SSE4-NEXT: movdqa %xmm11, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: movdqa %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm11 +; SSE4-NEXT: movdqa %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm9 ; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm11, %xmm1 +; SSE4-NEXT: movapd %xmm10, %xmm2 +; SSE4-NEXT: movapd %xmm9, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test122: @@ -5105,25 +5109,27 @@ ; ; SSE4-LABEL: test123: ; SSE4: # BB#0: # %entry +; SSE4-NEXT: movdqa %xmm7, %xmm9 +; SSE4-NEXT: movdqa %xmm6, %xmm10 ; SSE4-NEXT: movdqa %xmm0, %xmm8 -; SSE4-NEXT: movdqa %xmm3, %xmm9 -; SSE4-NEXT: pcmpgtq %xmm7, %xmm9 -; SSE4-NEXT: movdqa %xmm2, %xmm10 -; SSE4-NEXT: pcmpgtq %xmm6, %xmm10 -; SSE4-NEXT: movdqa %xmm1, %xmm11 -; SSE4-NEXT: pcmpgtq %xmm5, %xmm11 +; SSE4-NEXT: movdqa %xmm3, %xmm11 +; SSE4-NEXT: pcmpgtq %xmm9, %xmm11 +; SSE4-NEXT: movdqa %xmm2, %xmm6 +; SSE4-NEXT: pcmpgtq %xmm10, %xmm6 +; SSE4-NEXT: movdqa %xmm1, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm7 ; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 -; SSE4-NEXT: movdqa %xmm11, %xmm0 +; SSE4-NEXT: movdqa %xmm7, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: movdqa %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm10 +; SSE4-NEXT: movdqa %xmm11, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm9 ; SSE4-NEXT: movapd %xmm4, %xmm0 ; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm10, %xmm2 +; SSE4-NEXT: movapd %xmm9, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test123: @@ -5245,31 +5251,31 @@ ; ; SSE4-LABEL: test124: ; SSE4: # BB#0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 ; SSE4-NEXT: movdqa %xmm7, %xmm9 -; SSE4-NEXT: pcmpgtq %xmm3, %xmm9 -; SSE4-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE4-NEXT: pxor %xmm0, %xmm9 ; SSE4-NEXT: movdqa %xmm6, %xmm10 -; SSE4-NEXT: pcmpgtq %xmm2, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 ; SSE4-NEXT: movdqa %xmm5, %xmm11 -; SSE4-NEXT: pcmpgtq %xmm1, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 ; SSE4-NEXT: movdqa %xmm4, %xmm12 -; SSE4-NEXT: pcmpgtq %xmm8, %xmm12 -; SSE4-NEXT: pxor %xmm12, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 -; SSE4-NEXT: movdqa %xmm11, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: pcmpgtq %xmm3, %xmm7 +; SSE4-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE4-NEXT: pxor %xmm0, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm2, %xmm6 +; SSE4-NEXT: pxor %xmm0, %xmm6 +; SSE4-NEXT: pcmpgtq %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pcmpgtq %xmm8, %xmm4 +; SSE4-NEXT: pxor %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm12 +; SSE4-NEXT: movdqa %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm11 +; SSE4-NEXT: movdqa %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm9 +; SSE4-NEXT: movapd %xmm12, %xmm0 +; SSE4-NEXT: movapd %xmm11, %xmm1 +; SSE4-NEXT: movapd %xmm10, %xmm2 +; SSE4-NEXT: movapd %xmm9, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test124: @@ -5381,38 +5387,40 @@ ; ; SSE4-LABEL: test125: ; SSE4: # BB#0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 -; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm3, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 ; SSE4-NEXT: movdqa %xmm7, %xmm9 -; SSE4-NEXT: pxor %xmm0, %xmm9 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm9 -; SSE4-NEXT: movdqa %xmm2, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 ; SSE4-NEXT: movdqa %xmm6, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 -; SSE4-NEXT: pcmpgtq %xmm11, %xmm10 -; SSE4-NEXT: movdqa %xmm1, %xmm12 -; SSE4-NEXT: pxor %xmm0, %xmm12 ; SSE4-NEXT: movdqa %xmm5, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 -; SSE4-NEXT: pcmpgtq %xmm12, %xmm11 -; SSE4-NEXT: movdqa %xmm8, %xmm12 -; SSE4-NEXT: pxor %xmm0, %xmm12 -; SSE4-NEXT: pxor %xmm4, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm12, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 -; SSE4-NEXT: movdqa %xmm11, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movdqa %xmm4, %xmm12 +; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: movdqa %xmm3, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm7 +; SSE4-NEXT: movdqa %xmm2, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm6 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm6 +; SSE4-NEXT: movdqa %xmm1, %xmm4 +; SSE4-NEXT: pxor %xmm0, %xmm4 +; SSE4-NEXT: movdqa %xmm11, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm5 +; SSE4-NEXT: movdqa %xmm8, %xmm4 +; SSE4-NEXT: pxor %xmm0, %xmm4 +; SSE4-NEXT: pxor %xmm12, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm12 +; SSE4-NEXT: movdqa %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm11 +; SSE4-NEXT: movdqa %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm9 +; SSE4-NEXT: movapd %xmm12, %xmm0 +; SSE4-NEXT: movapd %xmm11, %xmm1 +; SSE4-NEXT: movapd %xmm10, %xmm2 +; SSE4-NEXT: movapd %xmm9, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test125: @@ -5442,7 +5450,7 @@ ; ; AVX2-LABEL: test125: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm5 ; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm6 ; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 @@ -5547,43 +5555,47 @@ ; ; SSE4-LABEL: test126: ; SSE4: # BB#0: # %entry +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm6, %xmm10 +; SSE4-NEXT: movdqa %xmm5, %xmm11 +; SSE4-NEXT: movdqa %xmm4, %xmm13 ; SSE4-NEXT: movdqa %xmm0, %xmm9 ; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm7, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 -; SSE4-NEXT: movdqa %xmm3, %xmm8 -; SSE4-NEXT: pxor %xmm0, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm8 +; SSE4-NEXT: movdqa %xmm8, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: movdqa %xmm3, %xmm7 +; SSE4-NEXT: pxor %xmm0, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm7 ; SSE4-NEXT: pcmpeqd %xmm12, %xmm12 -; SSE4-NEXT: pxor %xmm12, %xmm8 -; SSE4-NEXT: movdqa %xmm6, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 -; SSE4-NEXT: movdqa %xmm2, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 -; SSE4-NEXT: pcmpgtq %xmm11, %xmm10 -; SSE4-NEXT: pxor %xmm12, %xmm10 -; SSE4-NEXT: movdqa %xmm5, %xmm13 -; SSE4-NEXT: pxor %xmm0, %xmm13 -; SSE4-NEXT: movdqa %xmm1, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 -; SSE4-NEXT: pcmpgtq %xmm13, %xmm11 -; SSE4-NEXT: pxor %xmm12, %xmm11 -; SSE4-NEXT: movdqa %xmm4, %xmm13 -; SSE4-NEXT: pxor %xmm0, %xmm13 +; SSE4-NEXT: pxor %xmm12, %xmm7 +; SSE4-NEXT: movdqa %xmm10, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: movdqa %xmm2, %xmm6 +; SSE4-NEXT: pxor %xmm0, %xmm6 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm6 +; SSE4-NEXT: pxor %xmm12, %xmm6 +; SSE4-NEXT: movdqa %xmm11, %xmm4 +; SSE4-NEXT: pxor %xmm0, %xmm4 +; SSE4-NEXT: movdqa %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm5 +; SSE4-NEXT: pxor %xmm12, %xmm5 +; SSE4-NEXT: movdqa %xmm13, %xmm4 +; SSE4-NEXT: pxor %xmm0, %xmm4 ; SSE4-NEXT: pxor %xmm9, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm13, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 ; SSE4-NEXT: pxor %xmm12, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm11, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm13 +; SSE4-NEXT: movdqa %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm11 +; SSE4-NEXT: movdqa %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 +; SSE4-NEXT: movapd %xmm13, %xmm0 +; SSE4-NEXT: movapd %xmm11, %xmm1 +; SSE4-NEXT: movapd %xmm10, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test126: @@ -5618,7 +5630,7 @@ ; ; AVX2-LABEL: test126: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm5 ; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm6 ; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 @@ -5709,38 +5721,42 @@ ; ; SSE4-LABEL: test127: ; SSE4: # BB#0: # %entry +; SSE4-NEXT: movdqa %xmm7, %xmm9 +; SSE4-NEXT: movdqa %xmm6, %xmm10 +; SSE4-NEXT: movdqa %xmm5, %xmm11 +; SSE4-NEXT: movdqa %xmm4, %xmm12 ; SSE4-NEXT: movdqa %xmm0, %xmm8 ; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm7, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 -; SSE4-NEXT: movdqa %xmm3, %xmm9 -; SSE4-NEXT: pxor %xmm0, %xmm9 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm9 -; SSE4-NEXT: movdqa %xmm6, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 -; SSE4-NEXT: movdqa %xmm2, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 -; SSE4-NEXT: pcmpgtq %xmm11, %xmm10 -; SSE4-NEXT: movdqa %xmm5, %xmm12 -; SSE4-NEXT: pxor %xmm0, %xmm12 -; SSE4-NEXT: movdqa %xmm1, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 -; SSE4-NEXT: pcmpgtq %xmm12, %xmm11 -; SSE4-NEXT: movdqa %xmm4, %xmm12 -; SSE4-NEXT: pxor %xmm0, %xmm12 +; SSE4-NEXT: movdqa %xmm9, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: movdqa %xmm3, %xmm7 +; SSE4-NEXT: pxor %xmm0, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm7 +; SSE4-NEXT: movdqa %xmm10, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: movdqa %xmm2, %xmm6 +; SSE4-NEXT: pxor %xmm0, %xmm6 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm6 +; SSE4-NEXT: movdqa %xmm11, %xmm4 +; SSE4-NEXT: pxor %xmm0, %xmm4 +; SSE4-NEXT: movdqa %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm5 +; SSE4-NEXT: movdqa %xmm12, %xmm4 +; SSE4-NEXT: pxor %xmm0, %xmm4 ; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm12, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 -; SSE4-NEXT: movdqa %xmm11, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm12 +; SSE4-NEXT: movdqa %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm11 +; SSE4-NEXT: movdqa %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm9 +; SSE4-NEXT: movapd %xmm12, %xmm0 +; SSE4-NEXT: movapd %xmm11, %xmm1 +; SSE4-NEXT: movapd %xmm10, %xmm2 +; SSE4-NEXT: movapd %xmm9, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test127: @@ -5770,7 +5786,7 @@ ; ; AVX2-LABEL: test127: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm5 ; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm6 ; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 @@ -5876,43 +5892,45 @@ ; ; SSE4-LABEL: test128: ; SSE4: # BB#0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm3, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 ; SSE4-NEXT: movdqa %xmm7, %xmm8 -; SSE4-NEXT: pxor %xmm0, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm8 -; SSE4-NEXT: pcmpeqd %xmm12, %xmm12 -; SSE4-NEXT: pxor %xmm12, %xmm8 -; SSE4-NEXT: movdqa %xmm2, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 ; SSE4-NEXT: movdqa %xmm6, %xmm10 -; SSE4-NEXT: pxor %xmm0, %xmm10 -; SSE4-NEXT: pcmpgtq %xmm11, %xmm10 -; SSE4-NEXT: pxor %xmm12, %xmm10 -; SSE4-NEXT: movdqa %xmm1, %xmm13 -; SSE4-NEXT: pxor %xmm0, %xmm13 ; SSE4-NEXT: movdqa %xmm5, %xmm11 -; SSE4-NEXT: pxor %xmm0, %xmm11 -; SSE4-NEXT: pcmpgtq %xmm13, %xmm11 -; SSE4-NEXT: pxor %xmm12, %xmm11 -; SSE4-NEXT: movdqa %xmm9, %xmm13 -; SSE4-NEXT: pxor %xmm0, %xmm13 -; SSE4-NEXT: pxor %xmm4, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm13, %xmm0 +; SSE4-NEXT: movdqa %xmm4, %xmm13 +; SSE4-NEXT: movdqa %xmm0, %xmm9 +; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: movdqa %xmm3, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm7 +; SSE4-NEXT: pcmpeqd %xmm12, %xmm12 +; SSE4-NEXT: pxor %xmm12, %xmm7 +; SSE4-NEXT: movdqa %xmm2, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm6 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm6 +; SSE4-NEXT: pxor %xmm12, %xmm6 +; SSE4-NEXT: movdqa %xmm1, %xmm4 +; SSE4-NEXT: pxor %xmm0, %xmm4 +; SSE4-NEXT: movdqa %xmm11, %xmm5 +; SSE4-NEXT: pxor %xmm0, %xmm5 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm5 +; SSE4-NEXT: pxor %xmm12, %xmm5 +; SSE4-NEXT: movdqa %xmm9, %xmm4 +; SSE4-NEXT: pxor %xmm0, %xmm4 +; SSE4-NEXT: pxor %xmm13, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 ; SSE4-NEXT: pxor %xmm12, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm11, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm13 +; SSE4-NEXT: movdqa %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm11 +; SSE4-NEXT: movdqa %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 +; SSE4-NEXT: movapd %xmm13, %xmm0 +; SSE4-NEXT: movapd %xmm11, %xmm1 +; SSE4-NEXT: movapd %xmm10, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test128: @@ -5947,7 +5965,7 @@ ; ; AVX2-LABEL: test128: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm5 ; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm6 ; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 @@ -8158,7 +8176,7 @@ ; ; AVX2-LABEL: test157: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm5 ; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm6 ; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 @@ -8331,7 +8349,7 @@ ; ; AVX2-LABEL: test158: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm5 ; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm6 ; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 @@ -8484,7 +8502,7 @@ ; ; AVX2-LABEL: test159: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm5 ; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm6 ; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 @@ -8658,7 +8676,7 @@ ; ; AVX2-LABEL: test160: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm5 ; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm6 ; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 @@ -9080,7 +9098,7 @@ ; ; AVX2-LABEL: test165: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 @@ -9181,7 +9199,7 @@ ; ; AVX2-LABEL: test166: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 @@ -9272,7 +9290,7 @@ ; ; AVX2-LABEL: test167: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 @@ -9373,7 +9391,7 @@ ; ; AVX2-LABEL: test168: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 @@ -9791,7 +9809,7 @@ ; ; AVX2-LABEL: test173: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 @@ -9891,7 +9909,7 @@ ; ; AVX2-LABEL: test174: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 @@ -9983,7 +10001,7 @@ ; ; AVX2-LABEL: test175: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 @@ -10083,7 +10101,7 @@ ; ; AVX2-LABEL: test176: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 Index: test/CodeGen/X86/vselect.ll =================================================================== --- test/CodeGen/X86/vselect.ll +++ test/CodeGen/X86/vselect.ll @@ -487,25 +487,25 @@ define <16 x double> @select_illegal(<16 x double> %a, <16 x double> %b) { ; SSE-LABEL: select_illegal: ; SSE: # BB#0: +; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm4 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm5 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm6 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm7 -; SSE-NEXT: movaps %xmm7, 112(%rdi) -; SSE-NEXT: movaps %xmm6, 96(%rdi) -; SSE-NEXT: movaps %xmm5, 80(%rdi) -; SSE-NEXT: movaps %xmm4, 64(%rdi) -; SSE-NEXT: movaps %xmm3, 48(%rdi) -; SSE-NEXT: movaps %xmm2, 32(%rdi) -; SSE-NEXT: movaps %xmm1, 16(%rdi) -; SSE-NEXT: movaps %xmm0, (%rdi) -; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: movaps %xmm7, 112(%rax) +; SSE-NEXT: movaps %xmm6, 96(%rax) +; SSE-NEXT: movaps %xmm5, 80(%rax) +; SSE-NEXT: movaps %xmm4, 64(%rax) +; SSE-NEXT: movaps %xmm3, 48(%rax) +; SSE-NEXT: movaps %xmm2, 32(%rax) +; SSE-NEXT: movaps %xmm1, 16(%rax) +; SSE-NEXT: movaps %xmm0, (%rax) ; SSE-NEXT: retq ; ; AVX-LABEL: select_illegal: ; AVX: # BB#0: -; AVX-NEXT: vmovaps %ymm6, %ymm2 ; AVX-NEXT: vmovaps %ymm7, %ymm3 +; AVX-NEXT: vmovaps %ymm6, %ymm2 ; AVX-NEXT: retq %sel = select <16 x i1> , <16 x double> %a, <16 x double> %b ret <16 x double> %sel Index: test/CodeGen/X86/widen_bitops-0.ll =================================================================== --- test/CodeGen/X86/widen_bitops-0.ll +++ test/CodeGen/X86/widen_bitops-0.ll @@ -15,8 +15,8 @@ ; ; X64-SSE-LABEL: and_i24_as_v3i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: andl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: andl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <3 x i8> %2 = bitcast i24 %b to <3 x i8> @@ -34,8 +34,8 @@ ; ; X64-SSE-LABEL: xor_i24_as_v3i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: xorl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <3 x i8> %2 = bitcast i24 %b to <3 x i8> @@ -53,8 +53,8 @@ ; ; X64-SSE-LABEL: or_i24_as_v3i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: orl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: orl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <3 x i8> %2 = bitcast i24 %b to <3 x i8> @@ -76,8 +76,8 @@ ; ; X64-SSE-LABEL: and_i24_as_v8i3: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: andl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: andl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <8 x i3> %2 = bitcast i24 %b to <8 x i3> @@ -95,8 +95,8 @@ ; ; X64-SSE-LABEL: xor_i24_as_v8i3: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: xorl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <8 x i3> %2 = bitcast i24 %b to <8 x i3> @@ -114,8 +114,8 @@ ; ; X64-SSE-LABEL: or_i24_as_v8i3: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: orl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: orl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <8 x i3> %2 = bitcast i24 %b to <8 x i3> Index: test/CodeGen/X86/widen_bitops-1.ll =================================================================== --- test/CodeGen/X86/widen_bitops-1.ll +++ test/CodeGen/X86/widen_bitops-1.ll @@ -15,8 +15,8 @@ ; ; X64-SSE-LABEL: and_i32_as_v4i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: andl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: andl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <4 x i8> %2 = bitcast i32 %b to <4 x i8> @@ -34,8 +34,8 @@ ; ; X64-SSE-LABEL: xor_i32_as_v4i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: xorl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <4 x i8> %2 = bitcast i32 %b to <4 x i8> @@ -53,8 +53,8 @@ ; ; X64-SSE-LABEL: or_i32_as_v4i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: orl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: orl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <4 x i8> %2 = bitcast i32 %b to <4 x i8> @@ -76,8 +76,8 @@ ; ; X64-SSE-LABEL: and_i32_as_v8i4: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: andl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: andl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <8 x i4> %2 = bitcast i32 %b to <8 x i4> @@ -95,8 +95,8 @@ ; ; X64-SSE-LABEL: xor_i32_as_v8i4: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: xorl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <8 x i4> %2 = bitcast i32 %b to <8 x i4> @@ -114,8 +114,8 @@ ; ; X64-SSE-LABEL: or_i32_as_v8i4: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: orl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: orl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <8 x i4> %2 = bitcast i32 %b to <8 x i4> Index: test/CodeGen/X86/widen_load-2.ll =================================================================== --- test/CodeGen/X86/widen_load-2.ll +++ test/CodeGen/X86/widen_load-2.ll @@ -21,11 +21,11 @@ ; ; X64-LABEL: add3i32: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: paddd (%rdx), %xmm0 -; X64-NEXT: pextrd $2, %xmm0, 8(%rdi) -; X64-NEXT: movq %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrd $2, %xmm0, 8(%rax) +; X64-NEXT: movq %xmm0, (%rax) ; X64-NEXT: retq %a = load %i32vec3, %i32vec3* %ap, align 16 %b = load %i32vec3, %i32vec3* %bp, align 16 @@ -54,14 +54,14 @@ ; ; X64-LABEL: add3i32_2: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: pinsrd $2, 8(%rsi), %xmm0 ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; X64-NEXT: pinsrd $2, 8(%rdx), %xmm1 ; X64-NEXT: paddd %xmm0, %xmm1 -; X64-NEXT: pextrd $2, %xmm1, 8(%rdi) -; X64-NEXT: movq %xmm1, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrd $2, %xmm1, 8(%rax) +; X64-NEXT: movq %xmm1, (%rax) ; X64-NEXT: retq %a = load %i32vec3, %i32vec3* %ap, align 8 %b = load %i32vec3, %i32vec3* %bp, align 8 @@ -89,14 +89,14 @@ ; ; X64-LABEL: add7i32: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: paddd (%rdx), %xmm0 ; X64-NEXT: paddd 16(%rdx), %xmm1 -; X64-NEXT: pextrd $2, %xmm1, 24(%rdi) -; X64-NEXT: movq %xmm1, 16(%rdi) -; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrd $2, %xmm1, 24(%rax) +; X64-NEXT: movq %xmm1, 16(%rax) +; X64-NEXT: movdqa %xmm0, (%rax) ; X64-NEXT: retq %a = load %i32vec7, %i32vec7* %ap, align 16 %b = load %i32vec7, %i32vec7* %bp, align 16 @@ -125,16 +125,16 @@ ; ; X64-LABEL: add12i32: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: movdqa 32(%rsi), %xmm2 ; X64-NEXT: paddd (%rdx), %xmm0 ; X64-NEXT: paddd 16(%rdx), %xmm1 ; X64-NEXT: paddd 32(%rdx), %xmm2 -; X64-NEXT: movdqa %xmm2, 32(%rdi) -; X64-NEXT: movdqa %xmm1, 16(%rdi) -; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movdqa %xmm2, 32(%rax) +; X64-NEXT: movdqa %xmm1, 16(%rax) +; X64-NEXT: movdqa %xmm0, (%rax) ; X64-NEXT: retq %a = load %i32vec12, %i32vec12* %ap, align 16 %b = load %i32vec12, %i32vec12* %bp, align 16 @@ -171,13 +171,13 @@ ; ; X64-LABEL: add3i16: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; X64-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; X64-NEXT: paddd %xmm0, %xmm1 -; X64-NEXT: pextrw $4, %xmm1, 4(%rdi) +; X64-NEXT: pextrw $4, %xmm1, 4(%rax) ; X64-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; X64-NEXT: movd %xmm1, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movd %xmm1, (%rax) ; X64-NEXT: retq %a = load %i16vec3, %i16vec3* %ap, align 16 %b = load %i16vec3, %i16vec3* %bp, align 16 @@ -201,11 +201,11 @@ ; ; X64-LABEL: add4i16: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; X64-NEXT: paddw %xmm0, %xmm1 -; X64-NEXT: movq %xmm1, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %xmm1, (%rax) ; X64-NEXT: retq %a = load %i16vec4, %i16vec4* %ap, align 16 %b = load %i16vec4, %i16vec4* %bp, align 16 @@ -232,13 +232,13 @@ ; ; X64-LABEL: add12i16: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: paddw (%rdx), %xmm0 ; X64-NEXT: paddw 16(%rdx), %xmm1 -; X64-NEXT: movq %xmm1, 16(%rdi) -; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %xmm1, 16(%rax) +; X64-NEXT: movdqa %xmm0, (%rax) ; X64-NEXT: retq %a = load %i16vec12, %i16vec12* %ap, align 16 %b = load %i16vec12, %i16vec12* %bp, align 16 @@ -267,16 +267,16 @@ ; ; X64-LABEL: add18i16: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: movdqa 32(%rsi), %xmm2 ; X64-NEXT: paddw (%rdx), %xmm0 ; X64-NEXT: paddw 16(%rdx), %xmm1 ; X64-NEXT: paddw 32(%rdx), %xmm2 -; X64-NEXT: movd %xmm2, 32(%rdi) -; X64-NEXT: movdqa %xmm1, 16(%rdi) -; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movd %xmm2, 32(%rax) +; X64-NEXT: movdqa %xmm1, 16(%rax) +; X64-NEXT: movdqa %xmm0, (%rax) ; X64-NEXT: retq %a = load %i16vec18, %i16vec18* %ap, align 16 %b = load %i16vec18, %i16vec18* %bp, align 16 @@ -305,13 +305,13 @@ ; ; X64-LABEL: add3i8: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; X64-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; X64-NEXT: paddd %xmm0, %xmm1 -; X64-NEXT: pextrb $8, %xmm1, 2(%rdi) +; X64-NEXT: pextrb $8, %xmm1, 2(%rax) ; X64-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X64-NEXT: pextrw $0, %xmm1, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrw $0, %xmm1, (%rax) ; X64-NEXT: retq %a = load %i8vec3, %i8vec3* %ap, align 16 %b = load %i8vec3, %i8vec3* %bp, align 16 @@ -341,16 +341,16 @@ ; ; X64-LABEL: add31i8: ; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: paddb (%rdx), %xmm0 ; X64-NEXT: paddb 16(%rdx), %xmm1 -; X64-NEXT: pextrb $14, %xmm1, 30(%rdi) -; X64-NEXT: pextrw $6, %xmm1, 28(%rdi) -; X64-NEXT: pextrd $2, %xmm1, 24(%rdi) -; X64-NEXT: movq %xmm1, 16(%rdi) -; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrb $14, %xmm1, 30(%rax) +; X64-NEXT: pextrw $6, %xmm1, 28(%rax) +; X64-NEXT: pextrd $2, %xmm1, 24(%rax) +; X64-NEXT: movq %xmm1, 16(%rax) +; X64-NEXT: movdqa %xmm0, (%rax) ; X64-NEXT: retq %a = load %i8vec31, %i8vec31* %ap, align 16 %b = load %i8vec31, %i8vec31* %bp, align 16 @@ -386,6 +386,7 @@ ; ; X64-LABEL: rot: ; X64: # BB#0: # %entry +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa {{.*#+}} xmm0 = [40606,158] ; X64-NEXT: pextrw $0, %xmm0, (%rsi) ; X64-NEXT: movb $-98, 2(%rsi) @@ -397,9 +398,8 @@ ; X64-NEXT: psrld $1, %xmm1 ; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] ; X64-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X64-NEXT: pextrb $8, %xmm1, 2(%rdi) -; X64-NEXT: pextrw $0, %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrb $8, %xmm1, 2(%rax) +; X64-NEXT: pextrw $0, %xmm0, (%rax) ; X64-NEXT: retq entry: %storetmp = bitcast %i8vec3pack* %X to <3 x i8>* Index: test/CodeGen/X86/widen_load-3.ll =================================================================== --- test/CodeGen/X86/widen_load-3.ll +++ test/CodeGen/X86/widen_load-3.ll @@ -55,26 +55,26 @@ ; ; X64-SSE-LABEL: load7_aligned: ; X64-SSE: # BB#0: +; X64-SSE-NEXT: movq %rdi, %rax ; X64-SSE-NEXT: movaps (%rsi), %xmm0 ; X64-SSE-NEXT: movaps 16(%rsi), %xmm1 ; X64-SSE-NEXT: movaps 32(%rsi), %xmm2 -; X64-SSE-NEXT: movq 48(%rsi), %rax -; X64-SSE-NEXT: movq %rax, 48(%rdi) -; X64-SSE-NEXT: movaps %xmm2, 32(%rdi) -; X64-SSE-NEXT: movaps %xmm1, 16(%rdi) -; X64-SSE-NEXT: movaps %xmm0, (%rdi) -; X64-SSE-NEXT: movq %rdi, %rax +; X64-SSE-NEXT: movq 48(%rsi), %rcx +; X64-SSE-NEXT: movq %rcx, 48(%rax) +; X64-SSE-NEXT: movaps %xmm2, 32(%rax) +; X64-SSE-NEXT: movaps %xmm1, 16(%rax) +; X64-SSE-NEXT: movaps %xmm0, (%rax) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: load7_aligned: ; X64-AVX: # BB#0: +; X64-AVX-NEXT: movq %rdi, %rax ; X64-AVX-NEXT: vmovaps (%rsi), %ymm0 ; X64-AVX-NEXT: vmovaps 32(%rsi), %ymm1 -; X64-AVX-NEXT: vmovaps %ymm0, (%rdi) +; X64-AVX-NEXT: vmovaps %ymm0, (%rax) ; X64-AVX-NEXT: vextractf128 $1, %ymm1, %xmm0 -; X64-AVX-NEXT: vmovlps %xmm0, 48(%rdi) -; X64-AVX-NEXT: vmovaps %xmm1, 32(%rdi) -; X64-AVX-NEXT: movq %rdi, %rax +; X64-AVX-NEXT: vmovlps %xmm0, 48(%rax) +; X64-AVX-NEXT: vmovaps %xmm1, 32(%rax) ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq %x1 = load <7 x i64>, <7 x i64>* %x @@ -115,26 +115,26 @@ ; ; X64-SSE-LABEL: load7_unaligned: ; X64-SSE: # BB#0: +; X64-SSE-NEXT: movq %rdi, %rax ; X64-SSE-NEXT: movups (%rsi), %xmm0 ; X64-SSE-NEXT: movups 16(%rsi), %xmm1 ; X64-SSE-NEXT: movups 32(%rsi), %xmm2 -; X64-SSE-NEXT: movq 48(%rsi), %rax -; X64-SSE-NEXT: movq %rax, 48(%rdi) -; X64-SSE-NEXT: movaps %xmm2, 32(%rdi) -; X64-SSE-NEXT: movaps %xmm1, 16(%rdi) -; X64-SSE-NEXT: movaps %xmm0, (%rdi) -; X64-SSE-NEXT: movq %rdi, %rax +; X64-SSE-NEXT: movq 48(%rsi), %rcx +; X64-SSE-NEXT: movq %rcx, 48(%rax) +; X64-SSE-NEXT: movaps %xmm2, 32(%rax) +; X64-SSE-NEXT: movaps %xmm1, 16(%rax) +; X64-SSE-NEXT: movaps %xmm0, (%rax) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: load7_unaligned: ; X64-AVX: # BB#0: +; X64-AVX-NEXT: movq %rdi, %rax ; X64-AVX-NEXT: vmovups (%rsi), %ymm0 ; X64-AVX-NEXT: vmovups 32(%rsi), %xmm1 -; X64-AVX-NEXT: movq 48(%rsi), %rax -; X64-AVX-NEXT: movq %rax, 48(%rdi) -; X64-AVX-NEXT: vmovaps %xmm1, 32(%rdi) -; X64-AVX-NEXT: vmovaps %ymm0, (%rdi) -; X64-AVX-NEXT: movq %rdi, %rax +; X64-AVX-NEXT: movq 48(%rsi), %rcx +; X64-AVX-NEXT: movq %rcx, 48(%rax) +; X64-AVX-NEXT: vmovaps %xmm1, 32(%rax) +; X64-AVX-NEXT: vmovaps %ymm0, (%rax) ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq %x1 = load <7 x i64>, <7 x i64>* %x, align 1 Index: test/CodeGen/X86/win64_vararg.ll =================================================================== --- test/CodeGen/X86/win64_vararg.ll +++ test/CodeGen/X86/win64_vararg.ll @@ -121,10 +121,10 @@ } ; CHECK-LABEL: sret_arg: ; CHECK: pushq +; CHECK: movq %rcx, %rax ; CHECK-DAG: movq %r9, 40(%rsp) ; CHECK-DAG: movq %r8, 32(%rsp) ; CHECK: movl 32(%rsp), %[[tmp:[^ ]*]] -; CHECK: movl %[[tmp]], (%[[sret:[^ ]*]]) -; CHECK: movq %[[sret]], %rax +; CHECK: movl %[[tmp]], (%rax) ; CHECK: popq ; CHECK: retq Index: test/CodeGen/X86/x86-cmov-converter.ll =================================================================== --- test/CodeGen/X86/x86-cmov-converter.ll +++ test/CodeGen/X86/x86-cmov-converter.ll @@ -336,14 +336,14 @@ ; CHECK-LABEL: test_cmov_memoperand: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edx, %eax ; CHECK: cmpl %load = load i32, i32* %y %z = select i1 %cond, i32 %x, i32 %load ; CHECK-NOT: cmov ; CHECK: ja [[FALSE_BB:.*]] -; CHECK: movl (%r{{..}}), %[[R:.*]] +; CHECK: movl (%rcx), %eax ; CHECK: [[FALSE_BB]]: -; CHECK: movl %[[R]], % ret i32 %z } @@ -353,6 +353,7 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edx, %eax ; CHECK: cmpl %y = load i32, i32* %y.ptr %z1 = select i1 %cond, i32 %x, i32 %a @@ -362,17 +363,16 @@ ; CHECK: ja [[FALSE_BB:.*]] ; CHECK-DAG: movl %{{.*}}, %[[R1:.*]] ; CHECK-DAG: movl (%r{{..}}), %[[R2:.*]] -; CHECK-DAG: movl %{{.*}} %[[R3:.*]] +; CHECK-DAG: movl %{{.*}} %{{.*}} ; CHECK: [[FALSE_BB]]: ; CHECK: addl ; CHECK-DAG: %[[R1]] ; CHECK-DAG: , -; CHECK-DAG: %[[R3]] +; CHECK-DAG: %eax ; CHECK-DAG: addl ; CHECK-DAG: %[[R2]] ; CHECK-DAG: , -; CHECK-DAG: %[[R3]] -; CHECK: movl %[[R3]], %eax +; CHECK-DAG: %eax ; CHECK: retq %s1 = add i32 %z1, %z2 %s2 = add i32 %s1, %z3 @@ -384,6 +384,7 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group2: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edx, %eax ; CHECK: cmpl %y = load i32, i32* %y.ptr %z2 = select i1 %cond, i32 %a, i32 %x @@ -398,12 +399,11 @@ ; CHECK: addl ; CHECK-DAG: %[[R1]] ; CHECK-DAG: , -; CHECK-DAG: %[[R3]] +; CHECK-DAG: %eax ; CHECK-DAG: addl ; CHECK-DAG: %[[R2]] ; CHECK-DAG: , -; CHECK-DAG: %[[R3]] -; CHECK: movl %[[R3]], %eax +; CHECK-DAG: %eax ; CHECK: retq %s1 = add i32 %z1, %z2 %s2 = add i32 %s1, %z3 @@ -434,15 +434,15 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edi, %eax ; CHECK: cmpl %p = select i1 %cond, i32* %x, i32* %y %load = load i32, i32* %p %z = select i1 %cond, i32 %a, i32 %load ; CHECK-NOT: cmov ; CHECK: ja [[FALSE_BB:.*]] -; CHECK: movl (%r{{..}}), %[[R:.*]] +; CHECK: movl (%r{{..}}), %eax ; CHECK: [[FALSE_BB]]: -; CHECK: movl %[[R]], %eax ; CHECK: retq ret i32 %z } @@ -453,6 +453,7 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edi, %eax ; CHECK: cmpl %load1 = load i32*, i32** %y %p = select i1 %cond, i32* %x, i32* %load1 @@ -461,9 +462,8 @@ ; CHECK-NOT: cmov ; CHECK: ja [[FALSE_BB:.*]] ; CHECK: movq (%r{{..}}), %[[R1:.*]] -; CHECK: movl (%[[R1]]), %[[R2:.*]] +; CHECK: movl (%[[R1]]), %eax ; CHECK: [[FALSE_BB]]: -; CHECK: movl %[[R2]], %eax ; CHECK: retq ret i32 %z } @@ -475,6 +475,7 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edi, %eax ; CHECK: cmpl %p = select i1 %cond, i32* %x, i32* %y %p2 = select i1 %cond, i32* %z, i32* %p @@ -482,9 +483,8 @@ %r = select i1 %cond, i32 %a, i32 %load ; CHECK-NOT: cmov ; CHECK: ja [[FALSE_BB:.*]] -; CHECK: movl (%r{{..}}), %[[R:.*]] +; CHECK: movl (%r{{..}}), %eax ; CHECK: [[FALSE_BB]]: -; CHECK: movl %[[R]], %eax ; CHECK: retq ret i32 %r } Index: test/CodeGen/X86/x86-shrink-wrapping.ll =================================================================== --- test/CodeGen/X86/x86-shrink-wrapping.ll +++ test/CodeGen/X86/x86-shrink-wrapping.ll @@ -70,6 +70,7 @@ ; Check that we do not perform the restore inside the loop whereas the save ; is outside. ; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: +; CHECK-DAG: movl %esi, %eax ; ; Shrink-wrapping allows to skip the prologue in the else case. ; ENABLE: testl %edi, %edi @@ -77,14 +78,14 @@ ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx +; CHECK-DAG: pushq %rbx ; ; DISABLE: testl %edi, %edi ; DISABLE: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; SUM is in %esi because it is coalesced with the second ; argument on the else path. -; CHECK: xorl [[SUM:%esi]], [[SUM]] +; CHECK: xorl [[SUM:%eax]], [[SUM]] ; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] ; ; Next BB. @@ -98,23 +99,20 @@ ; SUM << 3. ; CHECK: shll $3, [[SUM]] ; -; Jump to epilogue. -; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] +; DISABLE: popq %rbx +; DISALBE: retq ; ; DISABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; DISABLE: addl %esi, %esi -; DISABLE: [[EPILOG_BB]]: ## %if.end +; DISABLE: addl %eax, %eax ; ; Epilogue code. ; CHECK-DAG: popq %rbx -; CHECK-DAG: movl %esi, %eax ; CHECK: retq ; ; ENABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; ENABLE: addl %esi, %esi -; ENABLE-NEXT: movl %esi, %eax +; ENABLE: addl %eax, %eax ; ENABLE-NEXT: retq define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { entry: @@ -197,19 +195,20 @@ ; Check with a more complex case that we do not have save within the loop and ; restore outside. ; CHECK-LABEL: loopInfoSaveOutsideLoop: +; CHECK-DAG: movl %esi, %eax ; ; ENABLE: testl %edi, %edi ; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx +; CHECK-DAG: pushq %rbx ; ; DISABLE: testl %edi, %edi ; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; CHECK: nop -; CHECK: xorl [[SUM:%esi]], [[SUM]] +; CHECK: xorl [[SUM:%eax]], [[SUM]] ; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] ; ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body @@ -221,22 +220,20 @@ ; CHECK: nop ; CHECK: shll $3, [[SUM]] ; -; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] +; DISABLE: popq %rbx +; DISABLE: retq ; ; DISABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; DISABLE: addl %esi, %esi -; DISABLE: [[EPILOG_BB]]: ## %if.end +; DISABLE: addl %eax, %eax ; ; Epilogue code. ; CHECK-DAG: popq %rbx -; CHECK-DAG: movl %esi, %eax ; CHECK: retq ; ; ENABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; ENABLE: addl %esi, %esi -; ENABLE-NEXT: movl %esi, %eax +; ENABLE: addl %eax, %eax ; ENABLE-NEXT: retq define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { entry: @@ -273,19 +270,20 @@ ; Check with a more complex case that we do not have restore within the loop and ; save outside. ; CHECK-LABEL: loopInfoRestoreOutsideLoop: +; CHECK-DAG: movl %esi, %eax ; ; ENABLE: testl %edi, %edi ; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx +; CHECK-DAG: pushq %rbx ; ; DISABLE: testl %edi, %edi ; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; CHECK: nop -; CHECK: xorl [[SUM:%esi]], [[SUM]] +; CHECK: xorl [[SUM:%eax]], [[SUM]] ; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] ; ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body @@ -296,23 +294,21 @@ ; Next BB. ; CHECK: shll $3, [[SUM]] ; -; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] +; DISABLE: popq %rbx +; DISABLE: retq ; ; DISABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; DISABLE: addl %esi, %esi -; DISABLE: [[EPILOG_BB]]: ## %if.end +; DISABLE: addl %eax, %eax ; ; Epilogue code. ; CHECK-DAG: popq %rbx -; CHECK-DAG: movl %esi, %eax ; CHECK: retq ; ; ENABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; ENABLE: addl %esi, %esi -; ENABLE-NEXT: movl %esi, %eax +; ENABLE: addl %eax, %eax ; ENABLE-NEXT: retq define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 { entry: @@ -357,13 +353,14 @@ ; Check that we handle inline asm correctly. ; CHECK-LABEL: inlineAsm: +; CHECK-DAG: movl %esi, %eax ; ; ENABLE: testl %edi, %edi ; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx +; CHECK-DAG: pushq %rbx ; ; DISABLE: testl %edi, %edi ; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] @@ -378,24 +375,22 @@ ; CHECK-NEXT: jne [[LOOP_LABEL]] ; Next BB. ; CHECK: nop -; CHECK: xorl %esi, %esi +; CHECK: xorl %eax, %eax ; -; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] +; DISABLE: popq %rbx +; DISABLE: retq ; ; DISABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; DISABLE: addl %esi, %esi -; DISABLE: [[EPILOG_BB]]: ## %if.end +; DISABLE: addl %eax, %eax ; ; Epilogue code. ; CHECK-DAG: popq %rbx -; CHECK-DAG: movl %esi, %eax ; CHECK: retq ; ; ENABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; ENABLE: addl %esi, %esi -; ENABLE-NEXT: movl %esi, %eax +; ENABLE: addl %eax, %eax ; ENABLE-NEXT: retq define i32 @inlineAsm(i32 %cond, i32 %N) { entry: Index: test/CodeGen/X86/xaluo.ll =================================================================== --- test/CodeGen/X86/xaluo.ll +++ test/CodeGen/X86/xaluo.ll @@ -719,26 +719,26 @@ define i32 @saddoselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: saddoselecti32: ; SDAG: ## BB#0: -; SDAG-NEXT: movl %edi, %eax -; SDAG-NEXT: addl %esi, %eax -; SDAG-NEXT: cmovol %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: movl %edi, %ecx +; SDAG-NEXT: addl %eax, %ecx +; SDAG-NEXT: cmovol %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: saddoselecti32: ; FAST: ## BB#0: -; FAST-NEXT: movl %edi, %eax -; FAST-NEXT: addl %esi, %eax -; FAST-NEXT: cmovol %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: movl %edi, %ecx +; FAST-NEXT: addl %eax, %ecx +; FAST-NEXT: cmovol %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: saddoselecti32: ; KNL: ## BB#0: -; KNL-NEXT: movl %edi, %eax -; KNL-NEXT: addl %esi, %eax -; KNL-NEXT: cmovol %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: movl %edi, %ecx +; KNL-NEXT: addl %eax, %ecx +; KNL-NEXT: cmovol %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -749,26 +749,26 @@ define i64 @saddoselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: saddoselecti64: ; SDAG: ## BB#0: -; SDAG-NEXT: movq %rdi, %rax -; SDAG-NEXT: addq %rsi, %rax -; SDAG-NEXT: cmovoq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: movq %rdi, %rcx +; SDAG-NEXT: addq %rax, %rcx +; SDAG-NEXT: cmovoq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: saddoselecti64: ; FAST: ## BB#0: -; FAST-NEXT: movq %rdi, %rax -; FAST-NEXT: addq %rsi, %rax -; FAST-NEXT: cmovoq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: movq %rdi, %rcx +; FAST-NEXT: addq %rax, %rcx +; FAST-NEXT: cmovoq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: saddoselecti64: ; KNL: ## BB#0: -; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: addq %rsi, %rax -; KNL-NEXT: cmovoq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: movq %rdi, %rcx +; KNL-NEXT: addq %rax, %rcx +; KNL-NEXT: cmovoq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -779,26 +779,26 @@ define i32 @uaddoselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: uaddoselecti32: ; SDAG: ## BB#0: -; SDAG-NEXT: movl %edi, %eax -; SDAG-NEXT: addl %esi, %eax -; SDAG-NEXT: cmovbl %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: movl %edi, %ecx +; SDAG-NEXT: addl %eax, %ecx +; SDAG-NEXT: cmovbl %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: uaddoselecti32: ; FAST: ## BB#0: -; FAST-NEXT: movl %edi, %eax -; FAST-NEXT: addl %esi, %eax -; FAST-NEXT: cmovbl %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: movl %edi, %ecx +; FAST-NEXT: addl %eax, %ecx +; FAST-NEXT: cmovbl %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: uaddoselecti32: ; KNL: ## BB#0: -; KNL-NEXT: movl %edi, %eax -; KNL-NEXT: addl %esi, %eax -; KNL-NEXT: cmovbl %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: movl %edi, %ecx +; KNL-NEXT: addl %eax, %ecx +; KNL-NEXT: cmovbl %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -809,26 +809,26 @@ define i64 @uaddoselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: uaddoselecti64: ; SDAG: ## BB#0: -; SDAG-NEXT: movq %rdi, %rax -; SDAG-NEXT: addq %rsi, %rax -; SDAG-NEXT: cmovbq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: movq %rdi, %rcx +; SDAG-NEXT: addq %rax, %rcx +; SDAG-NEXT: cmovbq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: uaddoselecti64: ; FAST: ## BB#0: -; FAST-NEXT: movq %rdi, %rax -; FAST-NEXT: addq %rsi, %rax -; FAST-NEXT: cmovbq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: movq %rdi, %rcx +; FAST-NEXT: addq %rax, %rcx +; FAST-NEXT: cmovbq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: uaddoselecti64: ; KNL: ## BB#0: -; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: addq %rsi, %rax -; KNL-NEXT: cmovbq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: movq %rdi, %rcx +; KNL-NEXT: addq %rax, %rcx +; KNL-NEXT: cmovbq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -839,23 +839,23 @@ define i32 @ssuboselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: ssuboselecti32: ; SDAG: ## BB#0: -; SDAG-NEXT: cmpl %esi, %edi -; SDAG-NEXT: cmovol %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: cmpl %eax, %edi +; SDAG-NEXT: cmovol %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: ssuboselecti32: ; FAST: ## BB#0: -; FAST-NEXT: cmpl %esi, %edi -; FAST-NEXT: cmovol %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: cmpl %eax, %edi +; FAST-NEXT: cmovol %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: ssuboselecti32: ; KNL: ## BB#0: -; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: cmovol %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: cmpl %eax, %edi +; KNL-NEXT: cmovol %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -866,23 +866,23 @@ define i64 @ssuboselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: ssuboselecti64: ; SDAG: ## BB#0: -; SDAG-NEXT: cmpq %rsi, %rdi -; SDAG-NEXT: cmovoq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: cmpq %rax, %rdi +; SDAG-NEXT: cmovoq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: ssuboselecti64: ; FAST: ## BB#0: -; FAST-NEXT: cmpq %rsi, %rdi -; FAST-NEXT: cmovoq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: cmpq %rax, %rdi +; FAST-NEXT: cmovoq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: ssuboselecti64: ; KNL: ## BB#0: -; KNL-NEXT: cmpq %rsi, %rdi -; KNL-NEXT: cmovoq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: cmpq %rax, %rdi +; KNL-NEXT: cmovoq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -893,23 +893,23 @@ define i32 @usuboselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: usuboselecti32: ; SDAG: ## BB#0: -; SDAG-NEXT: cmpl %esi, %edi -; SDAG-NEXT: cmovbl %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: cmpl %eax, %edi +; SDAG-NEXT: cmovbl %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: usuboselecti32: ; FAST: ## BB#0: -; FAST-NEXT: cmpl %esi, %edi -; FAST-NEXT: cmovbl %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: cmpl %eax, %edi +; FAST-NEXT: cmovbl %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: usuboselecti32: ; KNL: ## BB#0: -; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: cmovbl %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: cmpl %eax, %edi +; KNL-NEXT: cmovbl %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -920,23 +920,23 @@ define i64 @usuboselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: usuboselecti64: ; SDAG: ## BB#0: -; SDAG-NEXT: cmpq %rsi, %rdi -; SDAG-NEXT: cmovbq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: cmpq %rax, %rdi +; SDAG-NEXT: cmovbq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: usuboselecti64: ; FAST: ## BB#0: -; FAST-NEXT: cmpq %rsi, %rdi -; FAST-NEXT: cmovbq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: cmpq %rax, %rdi +; FAST-NEXT: cmovbq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: usuboselecti64: ; KNL: ## BB#0: -; KNL-NEXT: cmpq %rsi, %rdi -; KNL-NEXT: cmovbq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: cmpq %rax, %rdi +; KNL-NEXT: cmovbq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -1372,23 +1372,23 @@ define {i64, i1} @usuboovf(i64 %a, i64 %b) { ; SDAG-LABEL: usuboovf: ; SDAG: ## BB#0: -; SDAG-NEXT: notq %rsi -; SDAG-NEXT: xorl %edx, %edx ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: notq %rax +; SDAG-NEXT: xorl %edx, %edx ; SDAG-NEXT: retq ; ; FAST-LABEL: usuboovf: ; FAST: ## BB#0: -; FAST-NEXT: notq %rsi -; FAST-NEXT: xorl %edx, %edx ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: notq %rax +; FAST-NEXT: xorl %edx, %edx ; FAST-NEXT: retq ; ; KNL-LABEL: usuboovf: ; KNL: ## BB#0: -; KNL-NEXT: notq %rsi -; KNL-NEXT: xorl %edx, %edx ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: notq %rax +; KNL-NEXT: xorl %edx, %edx ; KNL-NEXT: retq %t0 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %a) %v0 = extractvalue {i64, i1} %t0, 0 Index: test/CodeGen/X86/xchg-nofold.ll =================================================================== --- test/CodeGen/X86/xchg-nofold.ll +++ test/CodeGen/X86/xchg-nofold.ll @@ -9,20 +9,21 @@ define zeroext i1 @_Z3fooRSt6atomicIbEb(%"struct.std::atomic"* nocapture dereferenceable(1) %a, i1 returned zeroext %b) nounwind { ; CHECK-LABEL: _Z3fooRSt6atomicIbEb: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: shrq $3, %rax -; CHECK-NEXT: movb 2147450880(%rax), %al -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: shrq $3, %rcx +; CHECK-NEXT: movb 2147450880(%rcx), %cl +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: je .LBB0_3 ; CHECK-NEXT: # BB#1: -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: andl $7, %ecx -; CHECK-NEXT: cmpb %al, %cl +; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: andl $7, %edx +; CHECK-NEXT: cmpb %cl, %dl ; CHECK-NEXT: jge .LBB0_2 ; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: xchgb %al, (%rdi) -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: xchgb %cl, (%rdi) +; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: pushq %rax Index: test/CodeGen/X86/xmulo.ll =================================================================== --- test/CodeGen/X86/xmulo.ll +++ test/CodeGen/X86/xmulo.ll @@ -92,6 +92,7 @@ ; SDAG-LABEL: smuloi8: ; SDAG: ## BB#0: ; SDAG-NEXT: movl %edi, %eax +; SDAG-NEXT: ## kill: %AL %AL %EAX ; SDAG-NEXT: imulb %sil ; SDAG-NEXT: seto %cl ; SDAG-NEXT: movb %al, (%rdx) @@ -101,6 +102,7 @@ ; FAST-LABEL: smuloi8: ; FAST: ## BB#0: ; FAST-NEXT: movl %edi, %eax +; FAST-NEXT: ## kill: %AL %AL %EAX ; FAST-NEXT: imulb %sil ; FAST-NEXT: seto %cl ; FAST-NEXT: movb %al, (%rdx) @@ -111,6 +113,7 @@ ; KNL-LABEL: smuloi8: ; KNL: ## BB#0: ; KNL-NEXT: movl %edi, %eax +; KNL-NEXT: ## kill: %AL %AL %EAX ; KNL-NEXT: imulb %sil ; KNL-NEXT: seto %cl ; KNL-NEXT: movb %al, (%rdx) @@ -218,6 +221,7 @@ ; SDAG-LABEL: umuloi8: ; SDAG: ## BB#0: ; SDAG-NEXT: movl %edi, %eax +; SDAG-NEXT: ## kill: %AL %AL %EAX ; SDAG-NEXT: mulb %sil ; SDAG-NEXT: seto %cl ; SDAG-NEXT: movb %al, (%rdx) @@ -227,6 +231,7 @@ ; FAST-LABEL: umuloi8: ; FAST: ## BB#0: ; FAST-NEXT: movl %edi, %eax +; FAST-NEXT: ## kill: %AL %AL %EAX ; FAST-NEXT: mulb %sil ; FAST-NEXT: seto %cl ; FAST-NEXT: movb %al, (%rdx) @@ -237,6 +242,7 @@ ; KNL-LABEL: umuloi8: ; KNL: ## BB#0: ; KNL-NEXT: movl %edi, %eax +; KNL-NEXT: ## kill: %AL %AL %EAX ; KNL-NEXT: mulb %sil ; KNL-NEXT: seto %cl ; KNL-NEXT: movb %al, (%rdx) @@ -254,6 +260,7 @@ ; SDAG: ## BB#0: ; SDAG-NEXT: movq %rdx, %rcx ; SDAG-NEXT: movl %edi, %eax +; SDAG-NEXT: ## kill: %AX %AX %EAX ; SDAG-NEXT: mulw %si ; SDAG-NEXT: seto %dl ; SDAG-NEXT: movw %ax, (%rcx) @@ -264,6 +271,7 @@ ; FAST: ## BB#0: ; FAST-NEXT: movq %rdx, %rcx ; FAST-NEXT: movl %edi, %eax +; FAST-NEXT: ## kill: %AX %AX %EAX ; FAST-NEXT: mulw %si ; FAST-NEXT: seto %dl ; FAST-NEXT: movw %ax, (%rcx) @@ -275,6 +283,7 @@ ; KNL: ## BB#0: ; KNL-NEXT: movq %rdx, %rcx ; KNL-NEXT: movl %edi, %eax +; KNL-NEXT: ## kill: %AX %AX %EAX ; KNL-NEXT: mulw %si ; KNL-NEXT: seto %dl ; KNL-NEXT: movw %ax, (%rcx) @@ -369,26 +378,26 @@ define i32 @smuloselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: smuloselecti32: ; SDAG: ## BB#0: -; SDAG-NEXT: movl %edi, %eax -; SDAG-NEXT: imull %esi, %eax -; SDAG-NEXT: cmovol %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: movl %edi, %ecx +; SDAG-NEXT: imull %eax, %ecx +; SDAG-NEXT: cmovol %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: smuloselecti32: ; FAST: ## BB#0: -; FAST-NEXT: movl %edi, %eax -; FAST-NEXT: imull %esi, %eax -; FAST-NEXT: cmovol %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: movl %edi, %ecx +; FAST-NEXT: imull %eax, %ecx +; FAST-NEXT: cmovol %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: smuloselecti32: ; KNL: ## BB#0: -; KNL-NEXT: movl %edi, %eax -; KNL-NEXT: imull %esi, %eax -; KNL-NEXT: cmovol %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: movl %edi, %ecx +; KNL-NEXT: imull %eax, %ecx +; KNL-NEXT: cmovol %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -399,26 +408,26 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: smuloselecti64: ; SDAG: ## BB#0: -; SDAG-NEXT: movq %rdi, %rax -; SDAG-NEXT: imulq %rsi, %rax -; SDAG-NEXT: cmovoq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: movq %rdi, %rcx +; SDAG-NEXT: imulq %rax, %rcx +; SDAG-NEXT: cmovoq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: smuloselecti64: ; FAST: ## BB#0: -; FAST-NEXT: movq %rdi, %rax -; FAST-NEXT: imulq %rsi, %rax -; FAST-NEXT: cmovoq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: movq %rdi, %rcx +; FAST-NEXT: imulq %rax, %rcx +; FAST-NEXT: cmovoq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: smuloselecti64: ; KNL: ## BB#0: -; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: imulq %rsi, %rax -; KNL-NEXT: cmovoq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: movq %rdi, %rcx +; KNL-NEXT: imulq %rax, %rcx +; KNL-NEXT: cmovoq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -694,8 +703,8 @@ define i1 @bug27873(i64 %c1, i1 %c2) { ; SDAG-LABEL: bug27873: ; SDAG: ## BB#0: -; SDAG-NEXT: movl $160, %ecx ; SDAG-NEXT: movq %rdi, %rax +; SDAG-NEXT: movl $160, %ecx ; SDAG-NEXT: mulq %rcx ; SDAG-NEXT: seto %al ; SDAG-NEXT: orb %sil, %al @@ -703,8 +712,8 @@ ; ; FAST-LABEL: bug27873: ; FAST: ## BB#0: -; FAST-NEXT: movl $160, %ecx ; FAST-NEXT: movq %rdi, %rax +; FAST-NEXT: movl $160, %ecx ; FAST-NEXT: mulq %rcx ; FAST-NEXT: seto %al ; FAST-NEXT: orb %sil, %al @@ -712,8 +721,8 @@ ; ; KNL-LABEL: bug27873: ; KNL: ## BB#0: -; KNL-NEXT: movl $160, %ecx ; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: movl $160, %ecx ; KNL-NEXT: mulq %rcx ; KNL-NEXT: seto %al ; KNL-NEXT: orb %sil, %al Index: test/CodeGen/XCore/byVal.ll =================================================================== --- test/CodeGen/XCore/byVal.ll +++ test/CodeGen/XCore/byVal.ll @@ -38,13 +38,13 @@ ; CHECK-LABEL: f2Test ; CHECK: extsp 4 ; CHECK: stw lr, sp[1] +; CHECK: mov r11, r1 ; CHECK: stw r2, sp[3] ; CHECK: stw r3, sp[4] ; CHECK: ldw r0, r0[0] ; CHECK: stw r0, sp[2] -; CHECK: ldaw r2, sp[2] -; CHECK: mov r0, r1 -; CHECK: mov r1, r2 +; CHECK: ldaw r1, sp[2] +; CHECK: mov r0, r11 ; CHECK: bl f2 ; CHECK: ldw lr, sp[1] ; CHECK: ldaw sp, sp[4]