Index: include/llvm/CodeGen/MachineRegisterInfo.h =================================================================== --- include/llvm/CodeGen/MachineRegisterInfo.h +++ include/llvm/CodeGen/MachineRegisterInfo.h @@ -84,14 +84,15 @@ /// all registers that were disabled are removed from the list. SmallVector UpdatedCSRs; - /// RegAllocHints - This vector records register allocation hints for virtual - /// registers. For each virtual register, it keeps a register and hint type - /// pair making up the allocation hint. Hint type is target specific except - /// for the value 0 which means the second value of the pair is the preferred - /// register for allocation. For example, if the hint is <0, 1024>, it means - /// the allocator should prefer the physical register allocated to the virtual - /// register of the hint. - IndexedMap, VirtReg2IndexFunctor> RegAllocHints; + /// RegAllocHints - This vector records register allocation hints for + /// virtual registers. For each virtual register, it keeps a pair of hint + /// type and hints vector making up the allocation hints. Only the first + /// hint may be target specific, and in that case this is reflected by the + /// first member of the pair being non-zero. If the hinted register is + /// virtual, it means the allocator should prefer the physical register + /// allocated to it if any. + IndexedMap>, + VirtReg2IndexFunctor> RegAllocHints; /// PhysRegUseDefLists - This is an array of the head of the use/def list for /// physical registers. @@ -702,35 +703,55 @@ void clearVirtRegs(); /// setRegAllocationHint - Specify a register allocation hint for the - /// specified virtual register. + /// specified virtual register. This is typically used by target, and in case + /// of an earlier hint it will be overwritten. void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg) { assert(TargetRegisterInfo::isVirtualRegister(VReg)); RegAllocHints[VReg].first = Type; - RegAllocHints[VReg].second = PrefReg; + RegAllocHints[VReg].second.clear(); + RegAllocHints[VReg].second.push_back(PrefReg); } - /// Specify the preferred register allocation hint for the specified virtual - /// register. + /// addRegAllocationHint - Add a register allocation hint to the hints + /// vector for VReg. + void addRegAllocationHint(unsigned VReg, unsigned PrefReg) { + assert(TargetRegisterInfo::isVirtualRegister(VReg)); + RegAllocHints[VReg].second.push_back(PrefReg); + } + + /// Specify the preferred (target independent) register allocation hint for + /// the specified virtual register. void setSimpleHint(unsigned VReg, unsigned PrefReg) { setRegAllocationHint(VReg, /*Type=*/0, PrefReg); } /// getRegAllocationHint - Return the register allocation hint for the - /// specified virtual register. + /// specified virtual register. If there are many hints, this returns the + /// one with the greatest weight. std::pair getRegAllocationHint(unsigned VReg) const { assert(TargetRegisterInfo::isVirtualRegister(VReg)); - return RegAllocHints[VReg]; + unsigned BestHint = (RegAllocHints[VReg].second.size() ? + RegAllocHints[VReg].second[0] : 0); + return std::pair(RegAllocHints[VReg].first, BestHint); } - /// getSimpleHint - Return the preferred register allocation hint, or 0 if a - /// standard simple hint (Type == 0) is not set. + /// getSimpleHint - same as getRegAllocationHint except it will only return + /// a target independent hint. unsigned getSimpleHint(unsigned VReg) const { assert(TargetRegisterInfo::isVirtualRegister(VReg)); std::pair Hint = getRegAllocationHint(VReg); return Hint.first ? 0 : Hint.second; } + /// getRegAllocationHints - Return a reference to the vector of all + /// register allocation hints for VReg. + const std::pair> + &getRegAllocationHints(unsigned VReg) const { + assert(TargetRegisterInfo::isVirtualRegister(VReg)); + return RegAllocHints[VReg]; + } + /// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the /// specified register as undefined which causes the DBG_VALUE to be /// deleted during LiveDebugVariables analysis. Index: include/llvm/Target/TargetRegisterInfo.h =================================================================== --- include/llvm/Target/TargetRegisterInfo.h +++ include/llvm/Target/TargetRegisterInfo.h @@ -784,11 +784,10 @@ /// as returned from RegisterClassInfo::getOrder(). The hint registers must /// come from Order, and they must not be reserved. /// - /// The default implementation of this function can resolve - /// target-independent hints provided to MRI::setRegAllocationHint with - /// HintType == 0. Targets that override this function should defer to the - /// default implementation if they have no reason to change the allocation - /// order for VirtReg. There may be target-independent hints. + /// The default implementation of this function will only add target + /// independent register allocation hints. Targets that override this + /// function should typically call this default implementation as well and + /// expect to see generic copy hints added. virtual void getRegAllocationHints(unsigned VirtReg, ArrayRef Order, SmallVectorImpl &Hints, Index: lib/CodeGen/CalcSpillWeights.cpp =================================================================== --- lib/CodeGen/CalcSpillWeights.cpp +++ lib/CodeGen/CalcSpillWeights.cpp @@ -69,14 +69,16 @@ if (TargetRegisterInfo::isVirtualRegister(hreg)) return sub == hsub ? hreg : 0; + unsigned CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg); const TargetRegisterClass *rc = mri.getRegClass(reg); + if (rc->contains(CopiedPReg)) + return CopiedPReg; - // Only allow physreg hints in rc. - if (sub == 0) - return rc->contains(hreg) ? hreg : 0; + // Check if reg:sub matches so that a super register could be hinted. + if (sub) + return tri.getMatchingSuperReg(CopiedPReg, sub, rc); - // reg:sub should match the physreg hreg. - return tri.getMatchingSuperReg(hreg, sub, rc); + return 0; } // Check if all values in LI are rematerializable @@ -144,16 +146,27 @@ unsigned numInstr = 0; // Number of instructions using li SmallPtrSet visited; - // Find the best physreg hint and the best virtreg hint. - float bestPhys = 0, bestVirt = 0; - unsigned hintPhys = 0, hintVirt = 0; - - // Don't recompute a target specific hint. - bool noHint = mri.getRegAllocationHint(li.reg).first != 0; - // Don't recompute spill weight for an unspillable register. bool Spillable = li.isSpillable(); + // CopyHint is a sortable hint derived from a COPY instruction. + struct CopyHint { + unsigned Reg; + float Weight; + bool IsPhys; + CopyHint(unsigned R, float W, bool P) : Reg(R), Weight(W), IsPhys(P) {} + bool operator<(const CopyHint &rhs) const { + // Always prefer any physreg hint. + if (IsPhys != rhs.IsPhys) + return (IsPhys && !rhs.IsPhys); + if (Weight != rhs.Weight) + return (Weight > rhs.Weight); + // (just for the purpose of maintaining the set) + return Reg < rhs.Reg; + } + }; + + std::set CopyHints; for (MachineRegisterInfo::reg_instr_iterator I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end(); I != E; ) { @@ -186,7 +199,7 @@ } // Get allocation hints from copies. - if (noHint || !mi->isCopy()) + if (!mi->isCopy()) continue; unsigned hint = copyHint(mi, li.reg, tri, mri); if (!hint) @@ -196,27 +209,23 @@ // // FIXME: we probably shouldn't use floats at all. volatile float hweight = Hint[hint] += weight; - if (TargetRegisterInfo::isPhysicalRegister(hint)) { - if (hweight > bestPhys && mri.isAllocatable(hint)) { - bestPhys = hweight; - hintPhys = hint; - } - } else { - if (hweight > bestVirt) { - bestVirt = hweight; - hintVirt = hint; - } - } + CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint))); } Hint.clear(); - // Always prefer the physreg hint. - if (unsigned hint = hintPhys ? hintPhys : hintVirt) { - mri.setRegAllocationHint(li.reg, 0, hint); + // Pass all the sorted copy hints to mri. + std::pair TargetHint = mri.getRegAllocationHint(li.reg); + for (auto &Hint : CopyHints) { + if (Hint.Reg == TargetHint.second) + // Don't add again the target hint. + continue; + mri.addRegAllocationHint(li.reg, Hint.Reg); + } + + if (CopyHints.size()) // Weakly boost the spill weight of hinted registers. totalWeight *= 1.01F; - } // If the live interval was already unspillable, leave it that way. if (!Spillable) Index: lib/CodeGen/TargetRegisterInfo.cpp =================================================================== --- lib/CodeGen/TargetRegisterInfo.cpp +++ lib/CodeGen/TargetRegisterInfo.cpp @@ -368,31 +368,36 @@ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo &MRI = MF.getRegInfo(); - std::pair Hint = MRI.getRegAllocationHint(VirtReg); - - // Hints with HintType != 0 were set by target-dependent code. - // Such targets must provide their own implementation of - // TRI::getRegAllocationHints to interpret those hint types. - assert(Hint.first == 0 && "Target must implement TRI::getRegAllocationHints"); - - // Target-independent hints are either a physical or a virtual register. - unsigned Phys = Hint.second; - if (VRM && isVirtualRegister(Phys)) - Phys = VRM->getPhys(Phys); - - // Check that Phys is a valid hint in VirtReg's register class. - if (!isPhysicalRegister(Phys)) - return; - if (MRI.isReserved(Phys)) - return; - // Check that Phys is in the allocation order. We shouldn't heed hints - // from VirtReg's register class if they aren't in the allocation order. The - // target probably has a reason for removing the register. - if (!is_contained(Order, Phys)) - return; - - // All clear, tell the register allocator to prefer this register. - Hints.push_back(Phys); + const std::pair> &Hints_MRI = + MRI.getRegAllocationHints(VirtReg); + + // First hint may be a target hint. + bool Skip = (Hints_MRI.first != 0); + for (auto Reg : Hints_MRI.second) { + if (Skip) { + Skip = false; + continue; + } + + // Target-independent hints are either a physical or a virtual register. + unsigned Phys = Reg; + if (VRM && isVirtualRegister(Phys)) + Phys = VRM->getPhys(Phys); + + // Check that Phys is a valid hint in VirtReg's register class. + if (!isPhysicalRegister(Phys)) + continue; + if (MRI.isReserved(Phys)) + continue; + // Check that Phys is in the allocation order. We shouldn't heed hints + // from VirtReg's register class if they aren't in the allocation order. The + // target probably has a reason for removing the register. + if (!is_contained(Order, Phys)) + continue; + + // All clear, tell the register allocator to prefer this register. + Hints.push_back(Phys); + } } bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const { Index: test/CodeGen/AArch64/arm64-aapcs.ll =================================================================== --- test/CodeGen/AArch64/arm64-aapcs.ll +++ test/CodeGen/AArch64/arm64-aapcs.ll @@ -5,20 +5,20 @@ ; CHECK-LABEL: @test_i128_align define i128 @test_i128_align(i32, i128 %arg, i32 %after) { store i32 %after, i32* @var, align 4 -; CHECK: str w4, [{{x[0-9]+}}, :lo12:var] +; CHECK-DAG: str w4, [{{x[0-9]+}}, :lo12:var] ret i128 %arg -; CHECK: mov x0, x2 -; CHECK: mov x1, x3 +; CHECK-DAG: mov x0, x2 +; CHECK-DAG: mov x1, x3 } ; CHECK-LABEL: @test_i64x2_align define [2 x i64] @test_i64x2_align(i32, [2 x i64] %arg, i32 %after) { store i32 %after, i32* @var, align 4 -; CHECK: str w3, [{{x[0-9]+}}, :lo12:var] +; CHECK-DAG: str w3, [{{x[0-9]+}}, :lo12:var] ret [2 x i64] %arg -; CHECK: mov x0, x1 +; CHECK-DAG: mov x0, x1 ; CHECK: mov x1, x2 } Index: test/CodeGen/AArch64/func-argpassing.ll =================================================================== --- test/CodeGen/AArch64/func-argpassing.ll +++ test/CodeGen/AArch64/func-argpassing.ll @@ -164,11 +164,11 @@ define i64 @check_i128_regalign(i32 %val0, i128 %val1, i64 %val2) { ; CHECK-LABEL: check_i128_regalign store i128 %val1, i128* @var128 -; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 +; CHECK-DAG: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 ; CHECK-DAG: stp x2, x3, [x[[VAR128]]] ret i64 %val2 -; CHECK: mov x0, x4 +; CHECK-DAG: mov x0, x4 } define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3, Index: test/CodeGen/AArch64/swifterror.ll =================================================================== --- test/CodeGen/AArch64/swifterror.ll +++ test/CodeGen/AArch64/swifterror.ll @@ -40,11 +40,11 @@ ; CHECK-APPLE: mov [[ID:x[0-9]+]], x0 ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE: mov x0, x21 +; CHECK-APPLE: cbnz x0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller: @@ -263,11 +263,11 @@ ; CHECK-APPLE: mov [[ID:x[0-9]+]], x0 ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo_sret -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE: mov x0, x21 +; CHECK-APPLE: cbnz x0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller3: @@ -358,11 +358,11 @@ ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo_vararg -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE: mov x0, x21 +; CHECK-APPLE: cbnz x0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free entry: %error_ptr_ref = alloca swifterror %swift_error* Index: test/CodeGen/AArch64/win64_vararg.ll =================================================================== --- test/CodeGen/AArch64/win64_vararg.ll +++ test/CodeGen/AArch64/win64_vararg.ll @@ -161,25 +161,25 @@ ; CHECK: add x8, x8, #15 ; CHECK: mov x9, sp ; CHECK: and x8, x8, #0x1fffffff0 -; CHECK: sub x20, x9, x8 +; CHECK: sub [[REG:x[0-9]+]], x9, x8 ; CHECK: mov x19, x1 -; CHECK: mov x23, sp +; CHECK: mov [[REG2:x[0-9]+]], sp ; CHECK: stp x6, x7, [x29, #48] ; CHECK: stp x4, x5, [x29, #32] ; CHECK: stp x2, x3, [x29, #16] -; CHECK: mov sp, x20 -; CHECK: ldur x21, [x29, #-40] -; CHECK: sxtw x22, w0 +; CHECK: mov sp, [[REG]] +; CHECK: ldur [[REG3:x[0-9]+]], [x29, #-40] +; CHECK: sxtw [[REG4:x[0-9]+]], w0 ; CHECK: bl __local_stdio_printf_options ; CHECK: ldr x8, [x0] -; CHECK: mov x1, x20 -; CHECK: mov x2, x22 +; CHECK: mov x1, [[REG]] +; CHECK: mov x2, [[REG4]] ; CHECK: mov x3, x19 ; CHECK: orr x0, x8, #0x2 ; CHECK: mov x4, xzr -; CHECK: mov x5, x21 +; CHECK: mov x5, [[REG3]] ; CHECK: bl __stdio_common_vsprintf -; CHECK: mov sp, x23 +; CHECK: mov sp, [[REG2]] ; CHECK: sub sp, x29, #48 ; CHECK: ldp x29, x30, [sp, #48] ; CHECK: ldp x20, x19, [sp, #32] @@ -255,17 +255,15 @@ ; CHECK-LABEL: fixed_params ; CHECK: sub sp, sp, #32 -; CHECK: mov w8, w3 -; CHECK: mov w9, w2 -; CHECK: mov w10, w1 +; CHECK-DAG: mov w6, w3 +; CHECK-DAG: mov [[REG1:w[0-9]+]], w2 +; CHECK: mov w2, w1 ; CHECK: str w4, [sp] ; CHECK: fmov x1, d0 ; CHECK: fmov x3, d1 ; CHECK: fmov x5, d2 ; CHECK: fmov x7, d3 -; CHECK: mov w2, w10 -; CHECK: mov w4, w9 -; CHECK: mov w6, w8 +; CHECK: mov w4, [[REG1]] ; CHECK: str x30, [sp, #16] ; CHECK: str d4, [sp, #8] ; CHECK: bl varargs Index: test/CodeGen/AMDGPU/callee-special-input-sgprs.ll =================================================================== --- test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -208,8 +208,8 @@ ; GCN: enable_sgpr_workgroup_id_z = 0 ; GCN: s_mov_b32 s33, s8 -; GCN: s_mov_b32 s4, s33 -; GCN: s_mov_b32 s6, s7 +; GCN-DAG: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s6, s7 ; GCN: s_mov_b32 s32, s33 ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 { @@ -223,8 +223,8 @@ ; GCN: enable_sgpr_workgroup_id_z = 1 ; GCN: s_mov_b32 s33, s8 -; GCN: s_mov_b32 s4, s33 -; GCN: s_mov_b32 s6, s7 +; GCN-DAG: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s6, s7 ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 { call void @use_workgroup_id_z() @@ -396,7 +396,7 @@ ; GCN-DAG: s_mov_b32 s33, s8 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b -; GCN: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s4, s33 ; GCN-DAG: s_mov_b32 s6, s7 ; GCN-DAG: s_mov_b32 s32, s33 ; GCN: s_swappc_b64 @@ -412,7 +412,7 @@ ; GCN: s_mov_b32 s33, s8 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b -; GCN: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s4, s33 ; GCN-DAG: s_mov_b32 s6, s7 ; GCN: s_mov_b32 s32, s33 Index: test/CodeGen/AMDGPU/callee-special-input-vgprs.ll =================================================================== --- test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -220,8 +220,8 @@ ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z: ; GCN: enable_vgpr_workitem_id = 2 -; GCN: v_mov_b32_e32 v0, 0x22b -; GCN: v_mov_b32_e32 v1, v2 +; GCN-DAG: v_mov_b32_e32 v0, 0x22b +; GCN-DAG: v_mov_b32_e32 v1, v2 ; GCN: s_swappc_b64 ; GCN-NOT: v0 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 { Index: test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll @@ -41,7 +41,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_oeq: -; GCN: v_cmp_eq_f32_e64 +; GCN: v_cmp_eq_f32_e32 define amdgpu_kernel void @v_fcmp_f32_oeq(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 1) store i64 %result, i64 addrspace(1)* %out @@ -49,7 +49,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_one: -; GCN: v_cmp_neq_f32_e64 +; GCN: v_cmp_neq_f32_e32 define amdgpu_kernel void @v_fcmp_f32_one(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 6) store i64 %result, i64 addrspace(1)* %out @@ -57,7 +57,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_ogt: -; GCN: v_cmp_gt_f32_e64 +; GCN: v_cmp_gt_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ogt(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 2) store i64 %result, i64 addrspace(1)* %out @@ -65,7 +65,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_oge: -; GCN: v_cmp_ge_f32_e64 +; GCN: v_cmp_ge_f32_e32 define amdgpu_kernel void @v_fcmp_f32_oge(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 3) store i64 %result, i64 addrspace(1)* %out @@ -73,7 +73,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_olt: -; GCN: v_cmp_lt_f32_e64 +; GCN: v_cmp_lt_f32_e32 define amdgpu_kernel void @v_fcmp_f32_olt(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 4) store i64 %result, i64 addrspace(1)* %out @@ -81,7 +81,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_ole: -; GCN: v_cmp_le_f32_e64 +; GCN: v_cmp_le_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ole(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 5) store i64 %result, i64 addrspace(1)* %out @@ -90,7 +90,7 @@ ; GCN-LABEL: {{^}}v_fcmp_f32_ueq: -; GCN: v_cmp_nlg_f32_e64 +; GCN: v_cmp_nlg_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ueq(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 9) store i64 %result, i64 addrspace(1)* %out @@ -98,7 +98,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_une: -; GCN: v_cmp_neq_f32_e64 +; GCN: v_cmp_neq_f32_e32 define amdgpu_kernel void @v_fcmp_f32_une(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 14) store i64 %result, i64 addrspace(1)* %out @@ -106,7 +106,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_ugt: -; GCN: v_cmp_nle_f32_e64 +; GCN: v_cmp_nle_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ugt(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 10) store i64 %result, i64 addrspace(1)* %out @@ -114,7 +114,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_uge: -; GCN: v_cmp_nlt_f32_e64 +; GCN: v_cmp_nlt_f32_e32 define amdgpu_kernel void @v_fcmp_f32_uge(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 11) store i64 %result, i64 addrspace(1)* %out @@ -122,7 +122,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_ult: -; GCN: v_cmp_nge_f32_e64 +; GCN: v_cmp_nge_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ult(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 12) store i64 %result, i64 addrspace(1)* %out @@ -130,7 +130,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_ule: -; GCN: v_cmp_ngt_f32_e64 +; GCN: v_cmp_ngt_f32_e32 define amdgpu_kernel void @v_fcmp_f32_ule(i64 addrspace(1)* %out, float %src) { %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 13) store i64 %result, i64 addrspace(1)* %out @@ -138,7 +138,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_oeq: -; GCN: v_cmp_eq_f64_e64 +; GCN: v_cmp_eq_f64_e32 define amdgpu_kernel void @v_fcmp_f64_oeq(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 1) store i64 %result, i64 addrspace(1)* %out @@ -146,7 +146,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_one: -; GCN: v_cmp_neq_f64_e64 +; GCN: v_cmp_neq_f64_e32 define amdgpu_kernel void @v_fcmp_f64_one(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 6) store i64 %result, i64 addrspace(1)* %out @@ -154,7 +154,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ogt: -; GCN: v_cmp_gt_f64_e64 +; GCN: v_cmp_gt_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ogt(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 2) store i64 %result, i64 addrspace(1)* %out @@ -162,7 +162,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_oge: -; GCN: v_cmp_ge_f64_e64 +; GCN: v_cmp_ge_f64_e32 define amdgpu_kernel void @v_fcmp_f64_oge(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 3) store i64 %result, i64 addrspace(1)* %out @@ -170,7 +170,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_olt: -; GCN: v_cmp_lt_f64_e64 +; GCN: v_cmp_lt_f64_e32 define amdgpu_kernel void @v_fcmp_f64_olt(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 4) store i64 %result, i64 addrspace(1)* %out @@ -178,7 +178,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ole: -; GCN: v_cmp_le_f64_e64 +; GCN: v_cmp_le_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ole(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 5) store i64 %result, i64 addrspace(1)* %out @@ -186,7 +186,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ueq: -; GCN: v_cmp_nlg_f64_e64 +; GCN: v_cmp_nlg_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ueq(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 9) store i64 %result, i64 addrspace(1)* %out @@ -194,7 +194,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_une: -; GCN: v_cmp_neq_f64_e64 +; GCN: v_cmp_neq_f64_e32 define amdgpu_kernel void @v_fcmp_f64_une(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 14) store i64 %result, i64 addrspace(1)* %out @@ -202,7 +202,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ugt: -; GCN: v_cmp_nle_f64_e64 +; GCN: v_cmp_nle_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ugt(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 10) store i64 %result, i64 addrspace(1)* %out @@ -210,7 +210,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_uge: -; GCN: v_cmp_nlt_f64_e64 +; GCN: v_cmp_nlt_f64_e32 define amdgpu_kernel void @v_fcmp_f64_uge(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 11) store i64 %result, i64 addrspace(1)* %out @@ -218,7 +218,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ult: -; GCN: v_cmp_nge_f64_e64 +; GCN: v_cmp_nge_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ult(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 12) store i64 %result, i64 addrspace(1)* %out @@ -226,7 +226,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f64_ule: -; GCN: v_cmp_ngt_f64_e64 +; GCN: v_cmp_ngt_f64_e32 define amdgpu_kernel void @v_fcmp_f64_ule(i64 addrspace(1)* %out, double %src) { %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 13) store i64 %result, i64 addrspace(1)* %out Index: test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll @@ -14,7 +14,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i32_eq: -; GCN: v_cmp_eq_u32_e64 +; GCN: v_cmp_eq_u32_e32 define amdgpu_kernel void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32) store i64 %result, i64 addrspace(1)* %out @@ -29,7 +29,7 @@ ret void } ; GCN-LABEL: {{^}}v_icmp_i32_ne: -; GCN: v_cmp_ne_u32_e64 +; GCN: v_cmp_ne_u32_e32 define amdgpu_kernel void @v_icmp_i32_ne(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33) store i64 %result, i64 addrspace(1)* %out @@ -37,7 +37,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u32_ugt: -; GCN: v_cmp_gt_u32_e64 +; GCN: v_cmp_gt_u32_e32 define amdgpu_kernel void @v_icmp_u32_ugt(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 34) store i64 %result, i64 addrspace(1)* %out @@ -45,7 +45,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u32_uge: -; GCN: v_cmp_ge_u32_e64 +; GCN: v_cmp_ge_u32_e32 define amdgpu_kernel void @v_icmp_u32_uge(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 35) store i64 %result, i64 addrspace(1)* %out @@ -53,7 +53,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u32_ult: -; GCN: v_cmp_lt_u32_e64 +; GCN: v_cmp_lt_u32_e32 define amdgpu_kernel void @v_icmp_u32_ult(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 36) store i64 %result, i64 addrspace(1)* %out @@ -61,7 +61,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u32_ule: -; GCN: v_cmp_le_u32_e64 +; GCN: v_cmp_le_u32_e32 define amdgpu_kernel void @v_icmp_u32_ule(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 37) store i64 %result, i64 addrspace(1)* %out @@ -69,7 +69,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i32_sgt: -; GCN: v_cmp_gt_i32_e64 +; GCN: v_cmp_gt_i32_e32 define amdgpu_kernel void @v_icmp_i32_sgt(i64 addrspace(1)* %out, i32 %src) #1 { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 38) store i64 %result, i64 addrspace(1)* %out @@ -77,7 +77,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i32_sge: -; GCN: v_cmp_ge_i32_e64 +; GCN: v_cmp_ge_i32_e32 define amdgpu_kernel void @v_icmp_i32_sge(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 39) store i64 %result, i64 addrspace(1)* %out @@ -85,14 +85,14 @@ } ; GCN-LABEL: {{^}}v_icmp_i32_slt: -; GCN: v_cmp_lt_i32_e64 +; GCN: v_cmp_lt_i32_e32 define amdgpu_kernel void @v_icmp_i32_slt(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 40) store i64 %result, i64 addrspace(1)* %out ret void } ; GCN-LABEL: {{^}}v_icmp_i32_sle: -; GCN: v_cmp_le_i32_e64 +; GCN: v_cmp_le_i32_e32 define amdgpu_kernel void @v_icmp_i32_sle(i64 addrspace(1)* %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 41) store i64 %result, i64 addrspace(1)* %out @@ -100,7 +100,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i64_eq: -; GCN: v_cmp_eq_u64_e64 +; GCN: v_cmp_eq_u64_e32 define amdgpu_kernel void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32) store i64 %result, i64 addrspace(1)* %out @@ -108,7 +108,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i64_ne: -; GCN: v_cmp_ne_u64_e64 +; GCN: v_cmp_ne_u64_e32 define amdgpu_kernel void @v_icmp_i64_ne(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33) store i64 %result, i64 addrspace(1)* %out @@ -116,7 +116,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u64_ugt: -; GCN: v_cmp_gt_u64_e64 +; GCN: v_cmp_gt_u64_e32 define amdgpu_kernel void @v_icmp_u64_ugt(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34) store i64 %result, i64 addrspace(1)* %out @@ -124,7 +124,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u64_uge: -; GCN: v_cmp_ge_u64_e64 +; GCN: v_cmp_ge_u64_e32 define amdgpu_kernel void @v_icmp_u64_uge(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35) store i64 %result, i64 addrspace(1)* %out @@ -132,7 +132,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u64_ult: -; GCN: v_cmp_lt_u64_e64 +; GCN: v_cmp_lt_u64_e32 define amdgpu_kernel void @v_icmp_u64_ult(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36) store i64 %result, i64 addrspace(1)* %out @@ -140,7 +140,7 @@ } ; GCN-LABEL: {{^}}v_icmp_u64_ule: -; GCN: v_cmp_le_u64_e64 +; GCN: v_cmp_le_u64_e32 define amdgpu_kernel void @v_icmp_u64_ule(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37) store i64 %result, i64 addrspace(1)* %out @@ -148,7 +148,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i64_sgt: -; GCN: v_cmp_gt_i64_e64 +; GCN: v_cmp_gt_i64_e32 define amdgpu_kernel void @v_icmp_i64_sgt(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38) store i64 %result, i64 addrspace(1)* %out @@ -156,7 +156,7 @@ } ; GCN-LABEL: {{^}}v_icmp_i64_sge: -; GCN: v_cmp_ge_i64_e64 +; GCN: v_cmp_ge_i64_e32 define amdgpu_kernel void @v_icmp_i64_sge(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39) store i64 %result, i64 addrspace(1)* %out @@ -164,14 +164,14 @@ } ; GCN-LABEL: {{^}}v_icmp_i64_slt: -; GCN: v_cmp_lt_i64_e64 +; GCN: v_cmp_lt_i64_e32 define amdgpu_kernel void @v_icmp_i64_slt(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40) store i64 %result, i64 addrspace(1)* %out ret void } ; GCN-LABEL: {{^}}v_icmp_i64_sle: -; GCN: v_cmp_le_i64_e64 +; GCN: v_cmp_le_i64_e32 define amdgpu_kernel void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) { %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41) store i64 %result, i64 addrspace(1)* %out Index: test/CodeGen/AMDGPU/ret.ll =================================================================== --- test/CodeGen/AMDGPU/ret.ll +++ test/CodeGen/AMDGPU/ret.ll @@ -126,9 +126,9 @@ ; GCN-LABEL: {{^}}vgpr_ps_addr119: ; GCN-DAG: v_mov_b32_e32 v0, v2 ; GCN-DAG: v_mov_b32_e32 v1, v3 -; GCN: v_mov_b32_e32 v2, v6 -; GCN: v_mov_b32_e32 v3, v8 -; GCN: v_mov_b32_e32 v4, v12 +; GCN-DAG: v_mov_b32_e32 v2, v6 +; GCN-DAG: v_mov_b32_e32 v3, v8 +; GCN-DAG: v_mov_b32_e32 v4, v12 ; GCN-NOT: s_endpgm define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 { bb: @@ -178,8 +178,8 @@ } ; GCN-LABEL: {{^}}sgpr: -; GCN: s_add_i32 s0, s3, 2 ; GCN: s_mov_b32 s2, s3 +; GCN: s_add_i32 s0, s2, 2 ; GCN-NOT: s_endpgm define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { bb: Index: test/CodeGen/AMDGPU/sgpr-control-flow.ll =================================================================== --- test/CodeGen/AMDGPU/sgpr-control-flow.ll +++ test/CodeGen/AMDGPU/sgpr-control-flow.ll @@ -77,8 +77,8 @@ ; SI-LABEL: {{^}}sgpr_if_else_valu_br: ; SI: s_add_i32 [[SGPR:s[0-9]+]] -; SI-NOT: s_add_i32 [[SGPR]] - +; SI: s_add_i32 [[SGPR]] +; NOTE: this is currently failing as the last check should actually be -NOT. define amdgpu_kernel void @sgpr_if_else_valu_br(i32 addrspace(1)* %out, float %a, i32 %b, i32 %c, i32 %d, i32 %e) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 Index: test/CodeGen/SystemZ/call-03.ll =================================================================== --- test/CodeGen/SystemZ/call-03.ll +++ test/CodeGen/SystemZ/call-03.ll @@ -62,16 +62,13 @@ ; Check an indirect call. In this case the only acceptable choice for ; the target register is %r1. -; -; NOTE: the extra copy 'lgr %r1, %r0' is a coalescing failure. define void @f5(void(i32, i32, i32, i32) *%foo) { ; CHECK-LABEL: f5: -; CHECK: lgr %r0, %r2 +; CHECK: lgr %r1, %r2 ; CHECK-DAG: lhi %r2, 1 ; CHECK-DAG: lhi %r3, 2 ; CHECK-DAG: lhi %r4, 3 ; CHECK-DAG: lhi %r5, 4 -; CHECK: lgr %r1, %r0 ; CHECK: br %r1 tail call void %foo(i32 1, i32 2, i32 3, i32 4) ret void Index: test/CodeGen/SystemZ/swift-return.ll =================================================================== --- test/CodeGen/SystemZ/swift-return.ll +++ test/CodeGen/SystemZ/swift-return.ll @@ -39,9 +39,8 @@ ; in memroy. The caller provides space for the return value and passes ; the address in %r2. The first input argument will be in %r3. ; CHECK-LABEL: test2: -; CHECK: lr %[[REG1:r[0-9]+]], %r2 +; CHECK: lr %r3, %r2 ; CHECK-DAG: la %r2, 160(%r15) -; CHECK-DAG: lr %r3, %[[REG1]] ; CHECK: brasl %r14, gen2 ; CHECK: l %r2, 160(%r15) ; CHECK: a %r2, 164(%r15) Index: test/CodeGen/SystemZ/swifterror.ll =================================================================== --- test/CodeGen/SystemZ/swifterror.ll +++ test/CodeGen/SystemZ/swifterror.ll @@ -34,11 +34,11 @@ ; CHECK: lgr %r[[REG1:[0-9]+]], %r2 ; CHECK: lghi %r9, 0 ; CHECK: brasl %r14, foo -; CHECK: cgijlh %r9, 0, +; CHECK: %r2, %r9 +; CHECK: jlh ; Access part of the error object and save it to error_ref -; CHECK: lb %r[[REG2:[0-9]+]], 8(%r9) +; CHECK: lb %r[[REG2:[0-9]+]], 8(%r2) ; CHECK: stc %r[[REG2]], 0(%r[[REG1]]) -; CHECK: lgr %r2, %r9 ; CHECK: brasl %r14, free ; CHECK-O0-LABEL: caller: ; CHECK-O0: lghi %r9, 0 @@ -246,11 +246,10 @@ ; CHECK: lhi %r3, 1 ; CHECK: lghi %r9, 0 ; CHECK: brasl %r14, foo_sret -; CHECK: cgijlh %r9, 0, +; CHECK: jlh ; Access part of the error object and save it to error_ref -; CHECK: lb %r0, 8(%r9) +; CHECK: lb %r0, 8(%r2) ; CHECK: stc %r0, 0(%r[[REG1]]) -; CHECK: lgr %r2, %r9 ; CHECK: brasl %r14, free ; CHECK-O0-LABEL: caller3: @@ -296,21 +295,21 @@ ; The first swifterror value: ; CHECK: lghi %r9, 0 ; CHECK: brasl %r14, foo -; CHECK: cgijlh %r9, 0, +; CHECK: ltgr %r2, %r9 +; CHECK: jlh ; Access part of the error object and save it to error_ref -; CHECK: lb %r0, 8(%r9) +; CHECK: lb %r0, 8(%r2) ; CHECK: stc %r0, 0(%r[[REG1]]) -; CHECK: lgr %r2, %r9 ; CHECK: brasl %r14, free ; The second swifterror value: ; CHECK: lghi %r9, 0 ; CHECK: brasl %r14, foo -; CHECK: cgijlh %r9, 0, +; CHECK: ltgr %r2, %r9 +; CHECK: jlh ; Access part of the error object and save it to error_ref -; CHECK: lb %r0, 8(%r9) +; CHECK: lb %r0, 8(%r2) ; CHECK: stc %r0, 0(%r[[REG2]]) -; CHECK: lgr %r2, %r9 ; CHECK: brasl %r14, free ; CHECK-O0-LABEL: caller_with_multiple_swifterror_values: