Index: include/llvm/CodeGen/MachineRegisterInfo.h =================================================================== --- include/llvm/CodeGen/MachineRegisterInfo.h +++ include/llvm/CodeGen/MachineRegisterInfo.h @@ -84,14 +84,15 @@ /// all registers that were disabled are removed from the list. SmallVector UpdatedCSRs; - /// RegAllocHints - This vector records register allocation hints for virtual - /// registers. For each virtual register, it keeps a register and hint type - /// pair making up the allocation hint. Hint type is target specific except - /// for the value 0 which means the second value of the pair is the preferred - /// register for allocation. For example, if the hint is <0, 1024>, it means - /// the allocator should prefer the physical register allocated to the virtual - /// register of the hint. - IndexedMap, VirtReg2IndexFunctor> RegAllocHints; + /// RegAllocHints - This vector records register allocation hints for + /// virtual registers. For each virtual register, it keeps a pair of hint + /// type and hints vector making up the allocation hints. Only the first + /// hint may be target specific, and in that case this is reflected by the + /// first member of the pair being non-zero. If the hinted register is + /// virtual, it means the allocator should prefer the physical register + /// allocated to it if any. + IndexedMap>, + VirtReg2IndexFunctor> RegAllocHints; /// PhysRegUseDefLists - This is an array of the head of the use/def list for /// physical registers. @@ -702,35 +703,61 @@ void clearVirtRegs(); /// setRegAllocationHint - Specify a register allocation hint for the - /// specified virtual register. + /// specified virtual register. This is typically used by target, and in case + /// of an earlier hint it will be overwritten. void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg) { assert(TargetRegisterInfo::isVirtualRegister(VReg)); RegAllocHints[VReg].first = Type; - RegAllocHints[VReg].second = PrefReg; + RegAllocHints[VReg].second.clear(); + RegAllocHints[VReg].second.push_back(PrefReg); } - /// Specify the preferred register allocation hint for the specified virtual - /// register. + /// addRegAllocationHint - Add a register allocation hint to the hints + /// vector for VReg. + void addRegAllocationHint(unsigned VReg, unsigned PrefReg) { + assert(TargetRegisterInfo::isVirtualRegister(VReg)); + RegAllocHints[VReg].second.push_back(PrefReg); + } + + /// Specify the preferred (target independent) register allocation hint for + /// the specified virtual register. void setSimpleHint(unsigned VReg, unsigned PrefReg) { setRegAllocationHint(VReg, /*Type=*/0, PrefReg); } + /// Clear any previous register allocation hints for VReg. + void clearRegAllocationHints(unsigned VReg) { + RegAllocHints[VReg].first = 0; + RegAllocHints[VReg].second.clear(); + } + /// getRegAllocationHint - Return the register allocation hint for the - /// specified virtual register. + /// specified virtual register. If there are many hints, this returns the + /// one with the greatest weight. std::pair getRegAllocationHint(unsigned VReg) const { assert(TargetRegisterInfo::isVirtualRegister(VReg)); - return RegAllocHints[VReg]; + unsigned BestHint = (RegAllocHints[VReg].second.size() ? + RegAllocHints[VReg].second[0] : 0); + return std::pair(RegAllocHints[VReg].first, BestHint); } - /// getSimpleHint - Return the preferred register allocation hint, or 0 if a - /// standard simple hint (Type == 0) is not set. + /// getSimpleHint - same as getRegAllocationHint except it will only return + /// a target independent hint. unsigned getSimpleHint(unsigned VReg) const { assert(TargetRegisterInfo::isVirtualRegister(VReg)); std::pair Hint = getRegAllocationHint(VReg); return Hint.first ? 0 : Hint.second; } + /// getRegAllocationHints - Return a reference to the vector of all + /// register allocation hints for VReg. + const std::pair> + &getRegAllocationHints(unsigned VReg) const { + assert(TargetRegisterInfo::isVirtualRegister(VReg)); + return RegAllocHints[VReg]; + } + /// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the /// specified register as undefined which causes the DBG_VALUE to be /// deleted during LiveDebugVariables analysis. Index: include/llvm/Target/TargetRegisterInfo.h =================================================================== --- include/llvm/Target/TargetRegisterInfo.h +++ include/llvm/Target/TargetRegisterInfo.h @@ -784,11 +784,10 @@ /// as returned from RegisterClassInfo::getOrder(). The hint registers must /// come from Order, and they must not be reserved. /// - /// The default implementation of this function can resolve - /// target-independent hints provided to MRI::setRegAllocationHint with - /// HintType == 0. Targets that override this function should defer to the - /// default implementation if they have no reason to change the allocation - /// order for VirtReg. There may be target-independent hints. + /// The default implementation of this function will only add target + /// independent register allocation hints. Targets that override this + /// function should typically call this default implementation as well and + /// expect to see generic copy hints added. virtual void getRegAllocationHints(unsigned VirtReg, ArrayRef Order, SmallVectorImpl &Hints, Index: lib/CodeGen/CalcSpillWeights.cpp =================================================================== --- lib/CodeGen/CalcSpillWeights.cpp +++ lib/CodeGen/CalcSpillWeights.cpp @@ -69,14 +69,16 @@ if (TargetRegisterInfo::isVirtualRegister(hreg)) return sub == hsub ? hreg : 0; + unsigned CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg); const TargetRegisterClass *rc = mri.getRegClass(reg); + if (rc->contains(CopiedPReg)) + return CopiedPReg; - // Only allow physreg hints in rc. - if (sub == 0) - return rc->contains(hreg) ? hreg : 0; + // Check if reg:sub matches so that a super register could be hinted. + if (sub) + return tri.getMatchingSuperReg(CopiedPReg, sub, rc); - // reg:sub should match the physreg hreg. - return tri.getMatchingSuperReg(hreg, sub, rc); + return 0; } // Check if all values in LI are rematerializable @@ -144,16 +146,27 @@ unsigned numInstr = 0; // Number of instructions using li SmallPtrSet visited; - // Find the best physreg hint and the best virtreg hint. - float bestPhys = 0, bestVirt = 0; - unsigned hintPhys = 0, hintVirt = 0; - - // Don't recompute a target specific hint. - bool noHint = mri.getRegAllocationHint(li.reg).first != 0; - // Don't recompute spill weight for an unspillable register. bool Spillable = li.isSpillable(); + // CopyHint is a sortable hint derived from a COPY instruction. + struct CopyHint { + unsigned Reg; + float Weight; + bool IsPhys; + CopyHint(unsigned R, float W, bool P) : Reg(R), Weight(W), IsPhys(P) {} + bool operator<(const CopyHint &rhs) const { + // Always prefer any physreg hint. + if (IsPhys != rhs.IsPhys) + return (IsPhys && !rhs.IsPhys); + if (Weight != rhs.Weight) + return (Weight > rhs.Weight); + // (just for the purpose of maintaining the set) + return Reg < rhs.Reg; + } + }; + + std::set CopyHints; for (MachineRegisterInfo::reg_instr_iterator I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end(); I != E; ) { @@ -186,7 +199,7 @@ } // Get allocation hints from copies. - if (noHint || !mi->isCopy()) + if (!mi->isCopy()) continue; unsigned hint = copyHint(mi, li.reg, tri, mri); if (!hint) @@ -196,27 +209,28 @@ // // FIXME: we probably shouldn't use floats at all. volatile float hweight = Hint[hint] += weight; - if (TargetRegisterInfo::isPhysicalRegister(hint)) { - if (hweight > bestPhys && mri.isAllocatable(hint)) { - bestPhys = hweight; - hintPhys = hint; - } - } else { - if (hweight > bestVirt) { - bestVirt = hweight; - hintVirt = hint; - } - } + CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint))); } Hint.clear(); - // Always prefer the physreg hint. - if (unsigned hint = hintPhys ? hintPhys : hintVirt) { - mri.setRegAllocationHint(li.reg, 0, hint); + std::pair TargetHint = mri.getRegAllocationHint(li.reg); + if (TargetHint.first == 0 && TargetHint.second) + // Forget any previous generic hints, as they are now recomputed. + mri.clearRegAllocationHints(li.reg); + + // Pass all the sorted copy hints to mri. + for (auto &Hint : CopyHints) { + if (TargetHint.first != 0 && Hint.Reg == TargetHint.second) + // Don't add a register already hinted with target type. It will be + // added later with a higher priority than these copy hints. + continue; + mri.addRegAllocationHint(li.reg, Hint.Reg); + } + + if (CopyHints.size()) // Weakly boost the spill weight of hinted registers. totalWeight *= 1.01F; - } // If the live interval was already unspillable, leave it that way. if (!Spillable) Index: lib/CodeGen/TargetRegisterInfo.cpp =================================================================== --- lib/CodeGen/TargetRegisterInfo.cpp +++ lib/CodeGen/TargetRegisterInfo.cpp @@ -368,31 +368,36 @@ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo &MRI = MF.getRegInfo(); - std::pair Hint = MRI.getRegAllocationHint(VirtReg); - - // Hints with HintType != 0 were set by target-dependent code. - // Such targets must provide their own implementation of - // TRI::getRegAllocationHints to interpret those hint types. - assert(Hint.first == 0 && "Target must implement TRI::getRegAllocationHints"); - - // Target-independent hints are either a physical or a virtual register. - unsigned Phys = Hint.second; - if (VRM && isVirtualRegister(Phys)) - Phys = VRM->getPhys(Phys); - - // Check that Phys is a valid hint in VirtReg's register class. - if (!isPhysicalRegister(Phys)) - return; - if (MRI.isReserved(Phys)) - return; - // Check that Phys is in the allocation order. We shouldn't heed hints - // from VirtReg's register class if they aren't in the allocation order. The - // target probably has a reason for removing the register. - if (!is_contained(Order, Phys)) - return; - - // All clear, tell the register allocator to prefer this register. - Hints.push_back(Phys); + const std::pair> &Hints_MRI = + MRI.getRegAllocationHints(VirtReg); + + // First hint may be a target hint. + bool Skip = (Hints_MRI.first != 0); + for (auto Reg : Hints_MRI.second) { + if (Skip) { + Skip = false; + continue; + } + + // Target-independent hints are either a physical or a virtual register. + unsigned Phys = Reg; + if (VRM && isVirtualRegister(Phys)) + Phys = VRM->getPhys(Phys); + + // Check that Phys is a valid hint in VirtReg's register class. + if (!isPhysicalRegister(Phys)) + continue; + if (MRI.isReserved(Phys)) + continue; + // Check that Phys is in the allocation order. We shouldn't heed hints + // from VirtReg's register class if they aren't in the allocation order. The + // target probably has a reason for removing the register. + if (!is_contained(Order, Phys)) + continue; + + // All clear, tell the register allocator to prefer this register. + Hints.push_back(Phys); + } } bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const { Index: test/CodeGen/AArch64/arm64-aapcs.ll =================================================================== --- test/CodeGen/AArch64/arm64-aapcs.ll +++ test/CodeGen/AArch64/arm64-aapcs.ll @@ -5,20 +5,20 @@ ; CHECK-LABEL: @test_i128_align define i128 @test_i128_align(i32, i128 %arg, i32 %after) { store i32 %after, i32* @var, align 4 -; CHECK: str w4, [{{x[0-9]+}}, :lo12:var] +; CHECK-DAG: str w4, [{{x[0-9]+}}, :lo12:var] ret i128 %arg -; CHECK: mov x0, x2 -; CHECK: mov x1, x3 +; CHECK-DAG: mov x0, x2 +; CHECK-DAG: mov x1, x3 } ; CHECK-LABEL: @test_i64x2_align define [2 x i64] @test_i64x2_align(i32, [2 x i64] %arg, i32 %after) { store i32 %after, i32* @var, align 4 -; CHECK: str w3, [{{x[0-9]+}}, :lo12:var] +; CHECK-DAG: str w3, [{{x[0-9]+}}, :lo12:var] ret [2 x i64] %arg -; CHECK: mov x0, x1 +; CHECK-DAG: mov x0, x1 ; CHECK: mov x1, x2 } Index: test/CodeGen/AArch64/func-argpassing.ll =================================================================== --- test/CodeGen/AArch64/func-argpassing.ll +++ test/CodeGen/AArch64/func-argpassing.ll @@ -164,11 +164,11 @@ define i64 @check_i128_regalign(i32 %val0, i128 %val1, i64 %val2) { ; CHECK-LABEL: check_i128_regalign store i128 %val1, i128* @var128 -; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 +; CHECK-DAG: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 ; CHECK-DAG: stp x2, x3, [x[[VAR128]]] ret i64 %val2 -; CHECK: mov x0, x4 +; CHECK-DAG: mov x0, x4 } define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3, Index: test/CodeGen/AArch64/swifterror.ll =================================================================== --- test/CodeGen/AArch64/swifterror.ll +++ test/CodeGen/AArch64/swifterror.ll @@ -40,11 +40,11 @@ ; CHECK-APPLE: mov [[ID:x[0-9]+]], x0 ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE: mov x0, x21 +; CHECK-APPLE: cbnz x0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller: @@ -263,11 +263,11 @@ ; CHECK-APPLE: mov [[ID:x[0-9]+]], x0 ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo_sret -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE: mov x0, x21 +; CHECK-APPLE: cbnz x0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller3: @@ -358,11 +358,11 @@ ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo_vararg -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE: mov x0, x21 +; CHECK-APPLE: cbnz x0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free entry: %error_ptr_ref = alloca swifterror %swift_error* Index: test/CodeGen/AArch64/win64_vararg.ll =================================================================== --- test/CodeGen/AArch64/win64_vararg.ll +++ test/CodeGen/AArch64/win64_vararg.ll @@ -161,25 +161,25 @@ ; CHECK: add x8, x8, #15 ; CHECK: mov x9, sp ; CHECK: and x8, x8, #0x1fffffff0 -; CHECK: sub x20, x9, x8 +; CHECK: sub [[REG:x[0-9]+]], x9, x8 ; CHECK: mov x19, x1 -; CHECK: mov x23, sp +; CHECK: mov [[REG2:x[0-9]+]], sp ; CHECK: stp x6, x7, [x29, #48] ; CHECK: stp x4, x5, [x29, #32] ; CHECK: stp x2, x3, [x29, #16] -; CHECK: mov sp, x20 -; CHECK: ldur x21, [x29, #-40] -; CHECK: sxtw x22, w0 +; CHECK: mov sp, [[REG]] +; CHECK: ldur [[REG3:x[0-9]+]], [x29, #-40] +; CHECK: sxtw [[REG4:x[0-9]+]], w0 ; CHECK: bl __local_stdio_printf_options ; CHECK: ldr x8, [x0] -; CHECK: mov x1, x20 -; CHECK: mov x2, x22 +; CHECK: mov x1, [[REG]] +; CHECK: mov x2, [[REG4]] ; CHECK: mov x3, x19 ; CHECK: orr x0, x8, #0x2 ; CHECK: mov x4, xzr -; CHECK: mov x5, x21 +; CHECK: mov x5, [[REG3]] ; CHECK: bl __stdio_common_vsprintf -; CHECK: mov sp, x23 +; CHECK: mov sp, [[REG2]] ; CHECK: sub sp, x29, #48 ; CHECK: ldp x29, x30, [sp, #48] ; CHECK: ldp x20, x19, [sp, #32] @@ -255,17 +255,15 @@ ; CHECK-LABEL: fixed_params ; CHECK: sub sp, sp, #32 -; CHECK: mov w8, w3 -; CHECK: mov w9, w2 -; CHECK: mov w10, w1 +; CHECK-DAG: mov w6, w3 +; CHECK-DAG: mov [[REG1:w[0-9]+]], w2 +; CHECK: mov w2, w1 ; CHECK: str w4, [sp] ; CHECK: fmov x1, d0 ; CHECK: fmov x3, d1 ; CHECK: fmov x5, d2 ; CHECK: fmov x7, d3 -; CHECK: mov w2, w10 -; CHECK: mov w4, w9 -; CHECK: mov w6, w8 +; CHECK: mov w4, [[REG1]] ; CHECK: str x30, [sp, #16] ; CHECK: str d4, [sp, #8] ; CHECK: bl varargs Index: test/CodeGen/AMDGPU/addrspacecast.ll =================================================================== --- test/CodeGen/AMDGPU/addrspacecast.ll +++ test/CodeGen/AMDGPU/addrspacecast.ll @@ -10,10 +10,10 @@ ; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}} ; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}} ; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] -; CI-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1 -; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc +; CI-DAG: v_cmp_ne_u32_e64 s[0:1], [[PTR]], -1 +; CI-DAG: v_cndmask_b32_e64 v[[HI:[0-9]+]], 0, [[VAPERTURE]], s[0:1] ; CI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] +; CI-DAG: v_cndmask_b32_e64 v[[LO:[0-9]+]], 0, [[VPTR]] ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 ; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}} @@ -22,17 +22,17 @@ ; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_SHARED_BASE]] ; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base -; GFX9: v_cmp_ne_u32_e64 vcc, [[PTR]], -1 -; GFX9: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc +; GFX9: v_cmp_ne_u32_e64 s[0:1], [[PTR]], -1 +; GFX9: v_cndmask_b32_e64 v[[HI:[0-9]+]], 0, [[VAPERTURE]], s[0:1] ; GFX9-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] +; GFX9-DAG: v_cndmask_b32_e64 v[[LO:[0-9]+]], 0, [[VPTR]] ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]] ; At most 2 digits. Make sure src_shared_base is not counted as a high ; number SGPR. -; CI: NumSgprs: {{[0-9][0-9]+}} +; CI: NumSgprs: {{[0-9][0-9]?}} ; GFX9: NumSgprs: {{[0-9]+}} define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 { %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* @@ -51,10 +51,10 @@ ; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] ; CI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 -; CI-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], 0 -; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc +; CI-DAG: v_cmp_ne_u32_e64 s[0:1], [[PTR]], 0 +; CI-DAG: v_cndmask_b32_e64 v[[HI:[0-9]+]], 0, [[VAPERTURE]], s[0:1] ; CI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] +; CI-DAG: v_cndmask_b32_e64 v[[LO:[0-9]+]], 0, [[VPTR]] ; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}} ; GFX9-DAG: s_getreg_b32 [[SSRC_PRIVATE:s[0-9]+]], hwreg(15, 0, 16) @@ -64,14 +64,14 @@ ; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base ; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 -; GFX9: v_cmp_ne_u32_e64 vcc, [[PTR]], 0 -; GFX9: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc +; GFX9: v_cmp_ne_u32_e64 s[0:1], [[PTR]], 0 +; GFX9: v_cndmask_b32_e64 v[[HI:[0-9]+]], 0, [[VAPERTURE]], s[0:1] ; GFX9: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] +; GFX9-DAG: v_cndmask_b32_e64 v[[LO:[0-9]+]], 0, [[VPTR]] ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]] -; CI: NumSgprs: {{[0-9][0-9]+}} +; CI: NumSgprs: {{[0-9][0-9]?}} ; GFX9: NumSgprs: {{[0-9]+}} define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #0 { %stof = addrspacecast i32* %ptr to i32 addrspace(4)* @@ -112,9 +112,9 @@ ; HSA: enable_sgpr_queue_ptr = 0 ; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}} -; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}} +; HSA-DAG: v_cmp_ne_u64_e64 s[0:1], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]] -; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]] +; HSA-DAG: v_cndmask_b32_e64 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]] ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} ; HSA: ds_write_b32 [[CASTPTR]], v[[K]] define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #0 { @@ -129,9 +129,9 @@ ; HSA: enable_sgpr_queue_ptr = 0 ; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}} -; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}} +; HSA-DAG: v_cmp_ne_u64_e64 s[0:1], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]] -; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], 0, v[[VPTR_LO]] +; HSA-DAG: v_cndmask_b32_e64 [[CASTPTR:v[0-9]+]], 0, v[[VPTR_LO]] ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} ; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #0 { @@ -269,7 +269,7 @@ ; HSA-LABEL: {{^}}store_flat_scratch: ; CI-DAG: s_mov_b32 flat_scratch_lo, s9 ; CI-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11 -; CI: s_lshr_b32 flat_scratch_hi, [[ADD]], 8 +; CI-DAG: s_lshr_b32 flat_scratch_hi, [[ADD]], 8 ; GFX9: s_add_u32 flat_scratch_lo, s6, s9 ; GFX9: s_addc_u32 flat_scratch_hi, s7, 0 Index: test/CodeGen/AMDGPU/anyext.ll =================================================================== --- test/CodeGen/AMDGPU/anyext.ll +++ test/CodeGen/AMDGPU/anyext.ll @@ -44,8 +44,8 @@ ; GFX9: global_load_short_d16_hi ; GFX9: v_and_b32_e32 v{{[0-9]+}}, 0x80008000 ; GFX9: v_bfi_b32 v{{[0-9]+}}, v{{[0-9]+}}, 0, v{{[0-9]+}} -; GFX9: v_cmp_eq_f32_e32 -; GFX9: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc +; GFX9: v_cmp_eq_f32_e64 +; GFX9: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[0:1] define amdgpu_kernel void @anyext_v2i16_to_v2i32() #0 { bb: %tmp = load i16, i16 addrspace(1)* undef, align 2 Index: test/CodeGen/AMDGPU/branch-condition-and.ll =================================================================== --- test/CodeGen/AMDGPU/branch-condition-and.ll +++ test/CodeGen/AMDGPU/branch-condition-and.ll @@ -10,9 +10,9 @@ ; that was not treated correctly. ; ; GCN-LABEL: {{^}}ham: -; GCN-DAG: v_cmp_lt_f32_e64 [[OTHERCC:s\[[0-9]+:[0-9]+\]]], -; GCN-DAG: v_cmp_lt_f32_e32 vcc, -; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[OTHERCC]] +; GCN-DAG: v_cmp_lt_f32_e64 [[OTHERCC:s\[[0-9]+:[0-9]+\]]], 0, v0 +; GCN-DAG: v_cmp_lt_f32_e64 [[CC:s\[[0-9]+:[0-9]+\]]], 0, v1 +; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[OTHERCC]], [[CC]] ; GCN: s_and_saveexec_b64 [[SAVED:s\[[0-9]+:[0-9]+\]]], [[AND]] ; GCN: ; mask branch [[BB5:BB[0-9]+_[0-9]+]] Index: test/CodeGen/AMDGPU/branch-relaxation.ll =================================================================== --- test/CodeGen/AMDGPU/branch-relaxation.ll +++ test/CodeGen/AMDGPU/branch-relaxation.ll @@ -139,8 +139,8 @@ ; GCN-LABEL: {{^}}min_long_forward_vbranch: ; GCN: buffer_load_dword -; GCN: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} -; GCN: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], vcc +; GCN: v_cmp_ne_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 0, v{{[0-9]+}} +; GCN: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[REG]] ; GCN: v_nop_e64 ; GCN: v_nop_e64 @@ -382,8 +382,8 @@ ; Requires expanding of required skip branch. ; GCN-LABEL: {{^}}uniform_inside_divergent: -; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} -; GCN-NEXT: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc +; GCN: v_cmp_gt_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 16, v{{[0-9]+}} +; GCN-NEXT: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], [[REG]] ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN-NEXT: s_cbranch_execnz [[IF:BB[0-9]+_[0-9]+]] @@ -430,8 +430,8 @@ ; si_mask_branch ; GCN-LABEL: {{^}}analyze_mask_branch: -; GCN: v_cmp_lt_f32_e32 vcc -; GCN-NEXT: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc +; GCN: v_cmp_lt_f32_e64 [[REG:s\[[0-9]+:[0-9]+\]]] +; GCN-NEXT: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], [[REG]] ; GCN-NEXT: ; mask branch [[RET:BB[0-9]+_[0-9]+]] ; GCN-NEXT: [[LOOP_BODY:BB[0-9]+_[0-9]+]]: ; %loop_body @@ -485,13 +485,12 @@ ; GCN-LABEL: {{^}}long_branch_hang: ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6 ; GCN-NEXT: s_cbranch_scc1 {{BB[0-9]+_[0-9]+}} -; GCN-NEXT: s_branch [[LONG_BR_0:BB[0-9]+_[0-9]+]] ; GCN-NEXT: BB{{[0-9]+_[0-9]+}}: +; GCN: s_add ; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-( ; GCN: s_setpc_b64 -; GCN-NEXT: [[LONG_BR_0]]: ; GCN-DAG: v_cmp_lt_i32 ; GCN-DAG: v_cmp_gt_i32 ; GCN: s_cbranch_vccnz Index: test/CodeGen/AMDGPU/callee-special-input-sgprs.ll =================================================================== --- test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -208,8 +208,8 @@ ; GCN: enable_sgpr_workgroup_id_z = 0 ; GCN: s_mov_b32 s33, s8 -; GCN: s_mov_b32 s4, s33 -; GCN: s_mov_b32 s6, s7 +; GCN-DAG: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s6, s7 ; GCN: s_mov_b32 s32, s33 ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 { @@ -223,8 +223,8 @@ ; GCN: enable_sgpr_workgroup_id_z = 1 ; GCN: s_mov_b32 s33, s8 -; GCN: s_mov_b32 s4, s33 -; GCN: s_mov_b32 s6, s7 +; GCN-DAG: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s6, s7 ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 { call void @use_workgroup_id_z() @@ -396,7 +396,7 @@ ; GCN-DAG: s_mov_b32 s33, s8 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b -; GCN: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s4, s33 ; GCN-DAG: s_mov_b32 s6, s7 ; GCN-DAG: s_mov_b32 s32, s33 ; GCN: s_swappc_b64 @@ -412,7 +412,7 @@ ; GCN: s_mov_b32 s33, s8 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b -; GCN: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s4, s33 ; GCN-DAG: s_mov_b32 s6, s7 ; GCN: s_mov_b32 s32, s33 Index: test/CodeGen/AMDGPU/callee-special-input-vgprs.ll =================================================================== --- test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -220,8 +220,8 @@ ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z: ; GCN: enable_vgpr_workitem_id = 2 -; GCN: v_mov_b32_e32 v0, 0x22b -; GCN: v_mov_b32_e32 v1, v2 +; GCN-DAG: v_mov_b32_e32 v0, 0x22b +; GCN-DAG: v_mov_b32_e32 v1, v2 ; GCN: s_swappc_b64 ; GCN-NOT: v0 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 { Index: test/CodeGen/AMDGPU/cf-loop-on-constant.ll =================================================================== --- test/CodeGen/AMDGPU/cf-loop-on-constant.ll +++ test/CodeGen/AMDGPU/cf-loop-on-constant.ll @@ -95,7 +95,7 @@ ; GCN-LABEL: {{^}}loop_arg_0: ; GCN: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} -; GCN: v_cmp_eq_u32_e32 vcc, 1, +; GCN: v_cmp_eq_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 1, ; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]] ; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80 Index: test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll =================================================================== --- test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll +++ test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll @@ -5,8 +5,8 @@ ; Produces error after adding an implicit def to v_cndmask_b32 ; GCN-LABEL: {{^}}vcc_shrink_vcc_def: -; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}} -; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc +; GCN: v_cmp_eq_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} +; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[REG]] ; GCN: v_cndmask_b32_e64 v0, 0, 1, s{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @vcc_shrink_vcc_def(float %arg, i32 %arg1, float %arg2, i32 %arg3) { bb0: @@ -33,7 +33,7 @@ ; GCN-LABEL: {{^}}preserve_condition_undef_flag: ; GCN-NOT: vcc -; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc +; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[REG:s\[[0-9]+:[0-9]+\]]] ; GCN: v_cndmask_b32_e64 v0, 0, 1, s{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) { bb0: Index: test/CodeGen/AMDGPU/collapse-endcf.ll =================================================================== --- test/CodeGen/AMDGPU/collapse-endcf.ll +++ test/CodeGen/AMDGPU/collapse-endcf.ll @@ -4,7 +4,7 @@ ; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]] ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]] ; GCN-NEXT: s_cbranch_execz [[ENDIF]] -; GCN: s_and_b64 exec, exec, vcc +; GCN: s_and_b64 exec, exec, [[REG:s\[[0-9]+:[0-9]+\]]] ; GCN-NEXT: ; mask branch [[ENDIF]] ; GCN-NEXT: {{^BB[0-9_]+}}: ; GCN: store_dword @@ -124,6 +124,7 @@ ; GCN-NEXT: s_cbranch_execz [[THEN_OUTER]] ; GCN-NEXT: {{^BB[0-9_]+}}: ; GCN: store_dword +; GCN: v_cmp_eq_u32_e64 ; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]] ; GCN-NEXT: ; mask branch [[THEN_OUTER_FLOW:BB[0-9_]+]] ; GCN-NEXT: {{^BB[0-9_]+}}: @@ -210,7 +211,7 @@ ; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]] ; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen -; GCN: s_and_b64 exec, exec, vcc +; GCN: s_and_b64 exec, exec, [[REG:s\[[0-9]+:[0-9]+\]]] ; GCN-NOT: s_or_b64 exec, exec Index: test/CodeGen/AMDGPU/commute-compares.ll =================================================================== --- test/CodeGen/AMDGPU/commute-compares.ll +++ test/CodeGen/AMDGPU/commute-compares.ll @@ -7,7 +7,7 @@ ; -------------------------------------------------------------------------------- ; GCN-LABEL: {{^}}commute_eq_64_i32: -; GCN: v_cmp_eq_u32_e32 vcc, 64, v{{[0-9]+}} +; GCN: v_cmp_eq_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 64, v{{[0-9]+}} define amdgpu_kernel void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -20,7 +20,7 @@ } ; GCN-LABEL: {{^}}commute_ne_64_i32: -; GCN: v_cmp_ne_u32_e32 vcc, 64, v{{[0-9]+}} +; GCN: v_cmp_ne_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 64, v{{[0-9]+}} define amdgpu_kernel void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -35,7 +35,7 @@ ; FIXME: Why isn't this being folded as a constant? ; GCN-LABEL: {{^}}commute_ne_litk_i32: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3039 -; GCN: v_cmp_ne_u32_e32 vcc, v{{[0-9]+}}, [[K]] +; GCN: v_cmp_ne_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, [[K]] define amdgpu_kernel void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -48,7 +48,7 @@ } ; GCN-LABEL: {{^}}commute_ugt_64_i32: -; GCN: v_cmp_lt_u32_e32 vcc, 64, v{{[0-9]+}} +; GCN: v_cmp_lt_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 64, v{{[0-9]+}} define amdgpu_kernel void @commute_ugt_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -61,7 +61,7 @@ } ; GCN-LABEL: {{^}}commute_uge_64_i32: -; GCN: v_cmp_lt_u32_e32 vcc, 63, v{{[0-9]+}} +; GCN: v_cmp_lt_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 63, v{{[0-9]+}} define amdgpu_kernel void @commute_uge_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -74,7 +74,7 @@ } ; GCN-LABEL: {{^}}commute_ult_64_i32: -; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}} +; GCN: v_cmp_gt_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 64, v{{[0-9]+}} define amdgpu_kernel void @commute_ult_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -87,7 +87,7 @@ } ; GCN-LABEL: {{^}}commute_ule_63_i32: -; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}} +; GCN: v_cmp_gt_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 64, v{{[0-9]+}} define amdgpu_kernel void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -101,7 +101,7 @@ ; GCN-LABEL: {{^}}commute_ule_64_i32: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x41{{$}} -; GCN: v_cmp_lt_u32_e32 vcc, v{{[0-9]+}}, [[K]] +; GCN: v_cmp_lt_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, [[K]] define amdgpu_kernel void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -114,7 +114,7 @@ } ; GCN-LABEL: {{^}}commute_sgt_neg1_i32: -; GCN: v_cmp_lt_i32_e32 vcc, -1, v{{[0-9]+}} +; GCN: v_cmp_lt_i32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], -1, v{{[0-9]+}} define amdgpu_kernel void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -127,7 +127,7 @@ } ; GCN-LABEL: {{^}}commute_sge_neg2_i32: -; GCN: v_cmp_lt_i32_e32 vcc, -3, v{{[0-9]+}} +; GCN: v_cmp_lt_i32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], -3, v{{[0-9]+}} define amdgpu_kernel void @commute_sge_neg2_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -140,7 +140,7 @@ } ; GCN-LABEL: {{^}}commute_slt_neg16_i32: -; GCN: v_cmp_gt_i32_e32 vcc, -16, v{{[0-9]+}} +; GCN: v_cmp_gt_i32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], -16, v{{[0-9]+}} define amdgpu_kernel void @commute_slt_neg16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -153,7 +153,7 @@ } ; GCN-LABEL: {{^}}commute_sle_5_i32: -; GCN: v_cmp_gt_i32_e32 vcc, 6, v{{[0-9]+}} +; GCN: v_cmp_gt_i32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 6, v{{[0-9]+}} define amdgpu_kernel void @commute_sle_5_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -170,7 +170,7 @@ ; -------------------------------------------------------------------------------- ; GCN-LABEL: {{^}}commute_eq_64_i64: -; GCN: v_cmp_eq_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_eq_u64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 64, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid @@ -183,7 +183,7 @@ } ; GCN-LABEL: {{^}}commute_ne_64_i64: -; GCN: v_cmp_ne_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_ne_u64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 64, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_ne_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid @@ -196,7 +196,7 @@ } ; GCN-LABEL: {{^}}commute_ugt_64_i64: -; GCN: v_cmp_lt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_lt_u64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 64, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_ugt_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid @@ -209,7 +209,7 @@ } ; GCN-LABEL: {{^}}commute_uge_64_i64: -; GCN: v_cmp_lt_u64_e32 vcc, 63, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_lt_u64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 63, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_uge_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid @@ -222,7 +222,7 @@ } ; GCN-LABEL: {{^}}commute_ult_64_i64: -; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_gt_u64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 64, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_ult_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid @@ -235,7 +235,7 @@ } ; GCN-LABEL: {{^}}commute_ule_63_i64: -; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_gt_u64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 64, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_ule_63_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid @@ -251,7 +251,7 @@ ; GCN-LABEL: {{^}}commute_ule_64_i64: ; GCN-DAG: s_movk_i32 s[[KLO:[0-9]+]], 0x41{{$}} -; GCN: v_cmp_gt_u64_e32 vcc, s{{\[}}[[KLO]]:{{[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_gt_u64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[KLO]]:{{[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_ule_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid @@ -264,7 +264,7 @@ } ; GCN-LABEL: {{^}}commute_sgt_neg1_i64: -; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_lt_i64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], -1, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_sgt_neg1_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid @@ -277,7 +277,7 @@ } ; GCN-LABEL: {{^}}commute_sge_neg2_i64: -; GCN: v_cmp_lt_i64_e32 vcc, -3, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_lt_i64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], -3, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_sge_neg2_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid @@ -290,7 +290,7 @@ } ; GCN-LABEL: {{^}}commute_slt_neg16_i64: -; GCN: v_cmp_gt_i64_e32 vcc, -16, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_gt_i64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], -16, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_slt_neg16_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid @@ -303,7 +303,7 @@ } ; GCN-LABEL: {{^}}commute_sle_5_i64: -; GCN: v_cmp_gt_i64_e32 vcc, 6, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_gt_i64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 6, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_sle_5_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid @@ -321,7 +321,7 @@ ; GCN-LABEL: {{^}}commute_oeq_2.0_f32: -; GCN: v_cmp_eq_f32_e32 vcc, 2.0, v{{[0-9]+}} +; GCN: v_cmp_eq_f32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{[0-9]+}} define amdgpu_kernel void @commute_oeq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -335,7 +335,7 @@ ; GCN-LABEL: {{^}}commute_ogt_2.0_f32: -; GCN: v_cmp_lt_f32_e32 vcc, 2.0, v{{[0-9]+}} +; GCN: v_cmp_lt_f32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{[0-9]+}} define amdgpu_kernel void @commute_ogt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -348,7 +348,7 @@ } ; GCN-LABEL: {{^}}commute_oge_2.0_f32: -; GCN: v_cmp_le_f32_e32 vcc, 2.0, v{{[0-9]+}} +; GCN: v_cmp_le_f32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{[0-9]+}} define amdgpu_kernel void @commute_oge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -361,7 +361,7 @@ } ; GCN-LABEL: {{^}}commute_olt_2.0_f32: -; GCN: v_cmp_gt_f32_e32 vcc, 2.0, v{{[0-9]+}} +; GCN: v_cmp_gt_f32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{[0-9]+}} define amdgpu_kernel void @commute_olt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -374,7 +374,7 @@ } ; GCN-LABEL: {{^}}commute_ole_2.0_f32: -; GCN: v_cmp_ge_f32_e32 vcc, 2.0, v{{[0-9]+}} +; GCN: v_cmp_ge_f32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{[0-9]+}} define amdgpu_kernel void @commute_ole_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -387,7 +387,7 @@ } ; GCN-LABEL: {{^}}commute_one_2.0_f32: -; GCN: v_cmp_lg_f32_e32 vcc, 2.0, v{{[0-9]+}} +; GCN: v_cmp_lg_f32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{[0-9]+}} define amdgpu_kernel void @commute_one_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -400,7 +400,7 @@ } ; GCN-LABEL: {{^}}commute_ord_2.0_f32: -; GCN: v_cmp_o_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]] +; GCN: v_cmp_o_f32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], [[REG:v[0-9]+]], [[REG]] define amdgpu_kernel void @commute_ord_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -413,7 +413,7 @@ } ; GCN-LABEL: {{^}}commute_ueq_2.0_f32: -; GCN: v_cmp_nlg_f32_e32 vcc, 2.0, v{{[0-9]+}} +; GCN: v_cmp_nlg_f32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{[0-9]+}} define amdgpu_kernel void @commute_ueq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -426,7 +426,7 @@ } ; GCN-LABEL: {{^}}commute_ugt_2.0_f32: -; GCN: v_cmp_nge_f32_e32 vcc, 2.0, v{{[0-9]+}} +; GCN: v_cmp_nge_f32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{[0-9]+}} define amdgpu_kernel void @commute_ugt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -439,7 +439,7 @@ } ; GCN-LABEL: {{^}}commute_uge_2.0_f32: -; GCN: v_cmp_ngt_f32_e32 vcc, 2.0, v{{[0-9]+}} +; GCN: v_cmp_ngt_f32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{[0-9]+}} define amdgpu_kernel void @commute_uge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -452,7 +452,7 @@ } ; GCN-LABEL: {{^}}commute_ult_2.0_f32: -; GCN: v_cmp_nle_f32_e32 vcc, 2.0, v{{[0-9]+}} +; GCN: v_cmp_nle_f32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{[0-9]+}} define amdgpu_kernel void @commute_ult_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -465,7 +465,7 @@ } ; GCN-LABEL: {{^}}commute_ule_2.0_f32: -; GCN: v_cmp_nlt_f32_e32 vcc, 2.0, v{{[0-9]+}} +; GCN: v_cmp_nlt_f32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{[0-9]+}} define amdgpu_kernel void @commute_ule_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -478,7 +478,7 @@ } ; GCN-LABEL: {{^}}commute_une_2.0_f32: -; GCN: v_cmp_neq_f32_e32 vcc, 2.0, v{{[0-9]+}} +; GCN: v_cmp_neq_f32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{[0-9]+}} define amdgpu_kernel void @commute_une_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -491,7 +491,7 @@ } ; GCN-LABEL: {{^}}commute_uno_2.0_f32: -; GCN: v_cmp_u_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]] +; GCN: v_cmp_u_f32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], [[REG:v[0-9]+]], [[REG]] define amdgpu_kernel void @commute_uno_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -509,7 +509,7 @@ ; GCN-LABEL: {{^}}commute_oeq_2.0_f64: -; GCN: v_cmp_eq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_eq_f64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_oeq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid @@ -523,7 +523,7 @@ ; GCN-LABEL: {{^}}commute_ogt_2.0_f64: -; GCN: v_cmp_lt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_lt_f64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_ogt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid @@ -536,7 +536,7 @@ } ; GCN-LABEL: {{^}}commute_oge_2.0_f64: -; GCN: v_cmp_le_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_le_f64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_oge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid @@ -549,7 +549,7 @@ } ; GCN-LABEL: {{^}}commute_olt_2.0_f64: -; GCN: v_cmp_gt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_gt_f64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_olt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid @@ -562,7 +562,7 @@ } ; GCN-LABEL: {{^}}commute_ole_2.0_f64: -; GCN: v_cmp_ge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_ge_f64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_ole_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid @@ -575,7 +575,7 @@ } ; GCN-LABEL: {{^}}commute_one_2.0_f64: -; GCN: v_cmp_lg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_lg_f64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_one_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid @@ -588,7 +588,7 @@ } ; GCN-LABEL: {{^}}commute_ord_2.0_f64: -; GCN: v_cmp_o_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]] +; GCN: v_cmp_o_f64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]] define amdgpu_kernel void @commute_ord_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid @@ -601,7 +601,7 @@ } ; GCN-LABEL: {{^}}commute_ueq_2.0_f64: -; GCN: v_cmp_nlg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_nlg_f64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_ueq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid @@ -614,7 +614,7 @@ } ; GCN-LABEL: {{^}}commute_ugt_2.0_f64: -; GCN: v_cmp_nge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_nge_f64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_ugt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid @@ -627,7 +627,7 @@ } ; GCN-LABEL: {{^}}commute_uge_2.0_f64: -; GCN: v_cmp_ngt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_ngt_f64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_uge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid @@ -640,7 +640,7 @@ } ; GCN-LABEL: {{^}}commute_ult_2.0_f64: -; GCN: v_cmp_nle_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_nle_f64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_ult_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid @@ -653,7 +653,7 @@ } ; GCN-LABEL: {{^}}commute_ule_2.0_f64: -; GCN: v_cmp_nlt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_nlt_f64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_ule_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid @@ -666,7 +666,7 @@ } ; GCN-LABEL: {{^}}commute_une_2.0_f64: -; GCN: v_cmp_neq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} +; GCN: v_cmp_neq_f64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 2.0, v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_une_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid @@ -679,7 +679,7 @@ } ; GCN-LABEL: {{^}}commute_uno_2.0_f64: -; GCN: v_cmp_u_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]] +; GCN: v_cmp_u_f64_e64 [[REG:s\[[0-9]+:[0-9]+\]]], [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]] define amdgpu_kernel void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid @@ -700,7 +700,7 @@ ; XGCN: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}} ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}} -; GCN: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, [[FI]] +; GCN: v_cmp_eq_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, [[FI]] define amdgpu_kernel void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 { entry: %stack0 = alloca i32 Index: test/CodeGen/AMDGPU/ctlz.ll =================================================================== --- test/CodeGen/AMDGPU/ctlz.ll +++ test/CodeGen/AMDGPU/ctlz.ll @@ -19,9 +19,9 @@ ; FUNC-LABEL: {{^}}s_ctlz_i32: ; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} ; GCN-DAG: s_flbit_i32_b32 [[CTLZ:s[0-9]+]], [[VAL]] -; GCN-DAG: v_cmp_ne_u32_e64 vcc, [[VAL]], 0{{$}} +; GCN-DAG: v_cmp_ne_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 [[VCTLZ:v[0-9]+]], [[CTLZ]] -; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], 32, [[VCTLZ]], vcc +; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 32, [[VCTLZ]], [[REG:s\[[0-9]+:[0-9]+\]]] ; GCN: buffer_store_dword [[RESULT]] ; GCN: s_endpgm @@ -36,8 +36,8 @@ ; FUNC-LABEL: {{^}}v_ctlz_i32: ; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]], ; GCN: v_ffbh_u32_e32 [[CTLZ:v[0-9]+]], [[VAL]] -; GCN: v_cmp_ne_u32_e32 vcc, 0, [[VAL]] -; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], 32, [[CTLZ]], vcc +; GCN: v_cmp_ne_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 0, [[VAL]] +; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 32, [[CTLZ]], [[REG]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -106,10 +106,10 @@ ; GCN: {{buffer|flat}}_load_ubyte [[VAL:v[0-9]+]], ; SI-DAG: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]] ; VI-DAG: v_ffbh_u32_sdwa [[FFBH:v[0-9]+]], [[VAL]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 -; SI: v_cmp_ne_u32_e32 vcc, 0, [[VAL]] -; VI: v_cmp_ne_u16_e32 vcc, 0, [[VAL]] +; SI: v_cmp_ne_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 0, [[VAL]] +; VI: v_cmp_ne_u16_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 0, [[VAL]] -; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 32, [[FFBH]], vcc +; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 32, [[FFBH]], [[REG:s\[[0-9]+:[0-9]+\]]] ; SI: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, 24, [[SELECT]] ; VI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, -16, [[SELECT]] @@ -124,13 +124,13 @@ ; FUNC-LABEL: {{^}}s_ctlz_i64: ; GCN: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} -; GCN-DAG: v_cmp_eq_u32_e64 vcc, s[[HI]], 0{{$}} +; GCN-DAG: v_cmp_eq_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], s[[HI]], 0{{$}} ; GCN-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]] ; GCN-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32 ; GCN-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]] ; GCN-DAG: v_mov_b32_e32 [[VFFBH_LO:v[0-9]+]], [[ADD]] ; GCN-DAG: v_mov_b32_e32 [[VFFBH_HI:v[0-9]+]], [[FFBH_HI]] -; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]] +; GCN-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]] ; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}} ; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}} define amdgpu_kernel void @s_ctlz_i64(i64 addrspace(1)* noalias %out, i64 %val) nounwind { @@ -149,14 +149,14 @@ ; FUNC-LABEL: {{^}}v_ctlz_i64: ; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} -; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, v[[HI]] +; GCN-DAG: v_cmp_eq_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]] ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]] ; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]] ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]] -; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], vcc +; GCN-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], [[REG]] ; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[LO]], v[[HI]] -; GCN-DAG: v_cmp_ne_u32_e32 vcc, 0, [[OR]] -; GCN-DAG: v_cndmask_b32_e32 v[[CLTZ_LO:[0-9]+]], 64, v[[CTLZ:[0-9]+]], vcc +; GCN-DAG: v_cmp_ne_u32_e64 [[REG2:s\[[0-9]+:[0-9]+\]]], 0, [[OR]] +; GCN-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], 64, v[[CTLZ:[0-9]+]], [[REG2]] ; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI:[0-9]+]]{{\]}} define amdgpu_kernel void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.r600.read.tidig.x() Index: test/CodeGen/AMDGPU/ctlz_zero_undef.ll =================================================================== --- test/CodeGen/AMDGPU/ctlz_zero_undef.ll +++ test/CodeGen/AMDGPU/ctlz_zero_undef.ll @@ -99,14 +99,14 @@ ; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i64: ; GCN: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} -; GCN-DAG: v_cmp_eq_u32_e64 vcc, s[[HI]], 0{{$}} ; GCN-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]] -; GCN-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32 ; GCN-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]] -; GCN-DAG: v_mov_b32_e32 [[VFFBH_LO:v[0-9]+]], [[FFBH_LO]] -; GCN-DAG: v_mov_b32_e32 [[VFFBH_HI:v[0-9]+]], [[FFBH_HI]] -; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]] +; GCN-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32 ; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}} +; GCN-DAG: v_mov_b32_e32 [[VFFBH_HI:v[0-9]+]], [[FFBH_HI]] +; GCN-DAG: v_mov_b32_e32 [[VFFBH_LO:v[0-9]+]], [[FFBH_LO]] +; GCN: v_cmp_eq_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], s[[HI]], 0{{$}} +; GCN: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]], [[REG]] ; GCN: {{buffer|flat}}_store_dwordx2 v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}} define amdgpu_kernel void @s_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 %val) nounwind { %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true) @@ -124,11 +124,11 @@ ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i64: ; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} -; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, v[[HI]] +; GCN-DAG: v_cmp_eq_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]] ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]] ; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]] ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]] -; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[FFBH_LO]] +; GCN-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[FFBH_LO]], [[REG]] ; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI:[0-9]+]]{{\]}} define amdgpu_kernel void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.r600.read.tidig.x() @@ -200,8 +200,8 @@ ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_neg1_two_use: ; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]], ; GCN-DAG: v_ffbh_u32_e32 [[RESULT0:v[0-9]+]], [[VAL]] -; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, [[VAL]] -; GCN-DAG: v_cndmask_b32_e64 [[RESULT1:v[0-9]+]], 0, 1, vcc +; GCN-DAG: v_cmp_eq_u32_e64 [[REG:s\[[0-9]+:[0-9]+\]]], 0, [[VAL]] +; GCN-DAG: v_cndmask_b32_e64 [[RESULT1:v[0-9]+]], 0, 1, [[REG]] ; GCN-DAG: buffer_store_dword [[RESULT0]] ; GCN-DAG: buffer_store_byte [[RESULT1]] ; GCN: s_endpgm Index: test/CodeGen/AMDGPU/fcmp.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fcmp.f16.ll +++ test/CodeGen/AMDGPU/fcmp.f16.ll @@ -6,8 +6,8 @@ ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] -; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]] +; SI: v_cmp_lt_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F32]], v[[B_F32]] +; VI: v_cmp_lt_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F16]], v[[B_F16]] ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] ; GCN: buffer_store_dword v[[R_I32]] ; GCN: s_endpgm @@ -31,7 +31,7 @@ ; SI: v_cvt_f32_f16_e64 v[[A_F32:[0-9]+]], |v[[A_F16]]| ; SI: v_cvt_f32_f16_e64 v[[B_F32:[0-9]+]], |v[[B_F16]]| -; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] +; SI: v_cmp_lt_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F32]], v[[B_F32]] ; VI: v_cmp_lt_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F16]]|, |v[[B_F16]]| ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] @@ -57,8 +57,8 @@ ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cmp_eq_f32_e32 vcc, v[[A_F32]], v[[B_F32]] -; VI: v_cmp_eq_f16_e32 vcc, v[[A_F16]], v[[B_F16]] +; SI: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F32]], v[[B_F32]] +; VI: v_cmp_eq_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F16]], v[[B_F16]] ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] ; GCN: buffer_store_dword v[[R_I32]] ; GCN: s_endpgm @@ -80,8 +80,8 @@ ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cmp_le_f32_e32 vcc, v[[A_F32]], v[[B_F32]] -; VI: v_cmp_le_f16_e32 vcc, v[[A_F16]], v[[B_F16]] +; SI: v_cmp_le_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F32]], v[[B_F32]] +; VI: v_cmp_le_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F16]], v[[B_F16]] ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] ; GCN: buffer_store_dword v[[R_I32]] ; GCN: s_endpgm @@ -103,8 +103,8 @@ ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cmp_gt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] -; VI: v_cmp_gt_f16_e32 vcc, v[[A_F16]], v[[B_F16]] +; SI: v_cmp_gt_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F32]], v[[B_F32]] +; VI: v_cmp_gt_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F16]], v[[B_F16]] ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] ; GCN: buffer_store_dword v[[R_I32]] ; GCN: s_endpgm @@ -126,8 +126,8 @@ ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cmp_lg_f32_e32 vcc, v[[A_F32]], v[[B_F32]] -; VI: v_cmp_lg_f16_e32 vcc, v[[A_F16]], v[[B_F16]] +; SI: v_cmp_lg_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F32]], v[[B_F32]] +; VI: v_cmp_lg_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F16]], v[[B_F16]] ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] ; GCN: buffer_store_dword v[[R_I32]] ; GCN: s_endpgm @@ -149,8 +149,8 @@ ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cmp_ge_f32_e32 vcc, v[[A_F32]], v[[B_F32]] -; VI: v_cmp_ge_f16_e32 vcc, v[[A_F16]], v[[B_F16]] +; SI: v_cmp_ge_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F32]], v[[B_F32]] +; VI: v_cmp_ge_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F16]], v[[B_F16]] ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] ; GCN: buffer_store_dword v[[R_I32]] ; GCN: s_endpgm @@ -172,8 +172,8 @@ ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cmp_o_f32_e32 vcc, v[[A_F32]], v[[B_F32]] -; VI: v_cmp_o_f16_e32 vcc, v[[A_F16]], v[[B_F16]] +; SI: v_cmp_o_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F32]], v[[B_F32]] +; VI: v_cmp_o_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F16]], v[[B_F16]] ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] ; GCN: buffer_store_dword v[[R_I32]] ; GCN: s_endpgm @@ -195,8 +195,8 @@ ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cmp_u_f32_e32 vcc, v[[A_F32]], v[[B_F32]] -; VI: v_cmp_u_f16_e32 vcc, v[[A_F16]], v[[B_F16]] +; SI: v_cmp_u_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F32]], v[[B_F32]] +; VI: v_cmp_u_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F16]], v[[B_F16]] ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] ; GCN: buffer_store_dword v[[R_I32]] ; GCN: s_endpgm @@ -218,8 +218,8 @@ ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cmp_nge_f32_e32 vcc, v[[A_F32]], v[[B_F32]] -; VI: v_cmp_nge_f16_e32 vcc, v[[A_F16]], v[[B_F16]] +; SI: v_cmp_nge_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F32]], v[[B_F32]] +; VI: v_cmp_nge_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F16]], v[[B_F16]] ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] ; GCN: buffer_store_dword v[[R_I32]] ; GCN: s_endpgm @@ -241,8 +241,8 @@ ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cmp_nlg_f32_e32 vcc, v[[A_F32]], v[[B_F32]] -; VI: v_cmp_nlg_f16_e32 vcc, v[[A_F16]], v[[B_F16]] +; SI: v_cmp_nlg_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F32]], v[[B_F32]] +; VI: v_cmp_nlg_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F16]], v[[B_F16]] ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] ; GCN: buffer_store_dword v[[R_I32]] ; GCN: s_endpgm @@ -264,8 +264,8 @@ ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cmp_ngt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] -; VI: v_cmp_ngt_f16_e32 vcc, v[[A_F16]], v[[B_F16]] +; SI: v_cmp_ngt_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F32]], v[[B_F32]] +; VI: v_cmp_ngt_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F16]], v[[B_F16]] ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] ; GCN: buffer_store_dword v[[R_I32]] ; GCN: s_endpgm @@ -287,8 +287,8 @@ ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cmp_nle_f32_e32 vcc, v[[A_F32]], v[[B_F32]] -; VI: v_cmp_nle_f16_e32 vcc, v[[A_F16]], v[[B_F16]] +; SI: v_cmp_nle_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F32]], v[[B_F32]] +; VI: v_cmp_nle_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F16]], v[[B_F16]] ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] ; GCN: buffer_store_dword v[[R_I32]] ; GCN: s_endpgm @@ -310,8 +310,8 @@ ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cmp_neq_f32_e32 vcc, v[[A_F32]], v[[B_F32]] -; VI: v_cmp_neq_f16_e32 vcc, v[[A_F16]], v[[B_F16]] +; SI: v_cmp_neq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F32]], v[[B_F32]] +; VI: v_cmp_neq_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F16]], v[[B_F16]] ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] ; GCN: buffer_store_dword v[[R_I32]] ; GCN: s_endpgm @@ -333,8 +333,8 @@ ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] -; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]] +; SI: v_cmp_nlt_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F32]], v[[B_F32]] +; VI: v_cmp_nlt_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F16]], v[[B_F16]] ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] ; GCN: buffer_store_dword v[[R_I32]] ; GCN: s_endpgm @@ -352,11 +352,11 @@ } ; GCN-LABEL: {{^}}fcmp_v2f16_lt: -; SI: v_cmp_lt_f32_e32 vcc, -; SI: v_cmp_lt_f32_e32 vcc, +; SI: v_cmp_lt_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, +; SI: v_cmp_lt_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, -; VI: v_cmp_lt_f16_e32 vcc, -; VI: v_cmp_lt_f16_e32 vcc, +; VI: v_cmp_lt_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, +; VI: v_cmp_lt_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, define amdgpu_kernel void @fcmp_v2f16_lt( <2 x i32> addrspace(1)* %r, <2 x half> addrspace(1)* %a, @@ -371,11 +371,11 @@ } ; GCN-LABEL: {{^}}fcmp_v2f16_eq -; SI: v_cmp_eq_f32_e32 vcc, -; SI: v_cmp_eq_f32_e32 vcc, +; SI: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, +; SI: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, -; VI: v_cmp_eq_f16_e32 vcc, -; VI: v_cmp_eq_f16_e32 vcc, +; VI: v_cmp_eq_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, +; VI: v_cmp_eq_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, define amdgpu_kernel void @fcmp_v2f16_eq( <2 x i32> addrspace(1)* %r, <2 x half> addrspace(1)* %a, @@ -390,10 +390,10 @@ } ; GCN-LABEL: {{^}}fcmp_v2f16_le: -; SI: v_cmp_le_f32_e32 vcc -; SI: v_cmp_le_f32_e32 vcc -; VI: v_cmp_le_f16_e32 vcc -; VI: v_cmp_le_f16_e32 vcc +; SI: v_cmp_le_f32_e64 {{s\[[0-9]+:[0-9]+\]}} +; SI: v_cmp_le_f32_e64 {{s\[[0-9]+:[0-9]+\]}} +; VI: v_cmp_le_f16_e64 {{s\[[0-9]+:[0-9]+\]}} +; VI: v_cmp_le_f16_e64 {{s\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @fcmp_v2f16_le( <2 x i32> addrspace(1)* %r, <2 x half> addrspace(1)* %a, @@ -408,11 +408,11 @@ } ; GCN-LABEL: {{^}}fcmp_v2f16_gt: -; SI: v_cmp_gt_f32_e32 vcc, -; SI: v_cmp_gt_f32_e32 vcc, +; SI: v_cmp_gt_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, +; SI: v_cmp_gt_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, -; VI: v_cmp_gt_f16_e32 vcc, -; VI: v_cmp_gt_f16_e32 vcc, +; VI: v_cmp_gt_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, +; VI: v_cmp_gt_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, define amdgpu_kernel void @fcmp_v2f16_gt( <2 x i32> addrspace(1)* %r, <2 x half> addrspace(1)* %a, @@ -427,11 +427,11 @@ } ; GCN-LABEL: {{^}}fcmp_v2f16_lg: -; SI: v_cmp_lg_f32_e32 vcc, -; SI: v_cmp_lg_f32_e32 vcc, +; SI: v_cmp_lg_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, +; SI: v_cmp_lg_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, -; VI: v_cmp_lg_f16_e32 vcc, -; VI: v_cmp_lg_f16_e32 vcc, +; VI: v_cmp_lg_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, +; VI: v_cmp_lg_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, define amdgpu_kernel void @fcmp_v2f16_lg( <2 x i32> addrspace(1)* %r, <2 x half> addrspace(1)* %a, @@ -446,11 +446,11 @@ } ; GCN-LABEL: {{^}}fcmp_v2f16_ge: -; SI: v_cmp_ge_f32_e32 vcc, -; SI: v_cmp_ge_f32_e32 vcc, +; SI: v_cmp_ge_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, +; SI: v_cmp_ge_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, -; VI: v_cmp_ge_f16_e32 vcc, -; VI: v_cmp_ge_f16_e32 vcc, +; VI: v_cmp_ge_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, +; VI: v_cmp_ge_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, define amdgpu_kernel void @fcmp_v2f16_ge( <2 x i32> addrspace(1)* %r, <2 x half> addrspace(1)* %a, @@ -465,11 +465,11 @@ } ; GCN-LABEL: {{^}}fcmp_v2f16_o: -; SI: v_cmp_o_f32_e32 vcc, -; SI: v_cmp_o_f32_e32 vcc, +; SI: v_cmp_o_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, +; SI: v_cmp_o_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, -; VI: v_cmp_o_f16_e32 vcc, -; VI: v_cmp_o_f16_e32 vcc, +; VI: v_cmp_o_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, +; VI: v_cmp_o_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, define amdgpu_kernel void @fcmp_v2f16_o( <2 x i32> addrspace(1)* %r, <2 x half> addrspace(1)* %a, @@ -484,11 +484,11 @@ } ; GCN-LABEL: {{^}}fcmp_v2f16_u: -; SI: v_cmp_u_f32_e32 vcc, -; SI: v_cmp_u_f32_e32 vcc, +; SI: v_cmp_u_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, +; SI: v_cmp_u_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, -; VI: v_cmp_u_f16_e32 vcc, -; VI: v_cmp_u_f16_e32 vcc, +; VI: v_cmp_u_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, +; VI: v_cmp_u_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, define amdgpu_kernel void @fcmp_v2f16_u( <2 x i32> addrspace(1)* %r, <2 x half> addrspace(1)* %a, @@ -503,11 +503,11 @@ } ; GCN-LABEL: {{^}}fcmp_v2f16_nge -; SI: v_cmp_nge_f32_e32 vcc, -; SI: v_cmp_nge_f32_e32 vcc, +; SI: v_cmp_nge_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, +; SI: v_cmp_nge_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, -; VI: v_cmp_nge_f16_e32 vcc, -; VI: v_cmp_nge_f16_e32 vcc, +; VI: v_cmp_nge_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, +; VI: v_cmp_nge_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, define amdgpu_kernel void @fcmp_v2f16_nge( <2 x i32> addrspace(1)* %r, <2 x half> addrspace(1)* %a, @@ -522,11 +522,11 @@ } ; GCN-LABEL: {{^}}fcmp_v2f16_nlg -; SI: v_cmp_nlg_f32_e32 vcc -; SI: v_cmp_nlg_f32_e32 vcc +; SI: v_cmp_nlg_f32_e64 {{s\[[0-9]+:[0-9]+\]}} +; SI: v_cmp_nlg_f32_e64 {{s\[[0-9]+:[0-9]+\]}} -; VI: v_cmp_nlg_f16_e32 vcc -; VI: v_cmp_nlg_f16_e32 vcc +; VI: v_cmp_nlg_f16_e64 {{s\[[0-9]+:[0-9]+\]}} +; VI: v_cmp_nlg_f16_e64 {{s\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @fcmp_v2f16_nlg( <2 x i32> addrspace(1)* %r, <2 x half> addrspace(1)* %a, @@ -541,11 +541,11 @@ } ; GCN-LABEL: {{^}}fcmp_v2f16_ngt -; SI: v_cmp_ngt_f32_e32 vcc, -; SI: v_cmp_ngt_f32_e32 vcc, +; SI: v_cmp_ngt_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, +; SI: v_cmp_ngt_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, -; VI: v_cmp_ngt_f16_e32 vcc, -; VI: v_cmp_ngt_f16_e32 vcc, +; VI: v_cmp_ngt_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, +; VI: v_cmp_ngt_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, define amdgpu_kernel void @fcmp_v2f16_ngt( <2 x i32> addrspace(1)* %r, <2 x half> addrspace(1)* %a, @@ -560,11 +560,11 @@ } ; GCN-LABEL: {{^}}fcmp_v2f16_nle -; SI: v_cmp_nle_f32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} -; SI: v_cmp_nle_f32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_nle_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_nle_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}} -; VI: v_cmp_nle_f16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} -; VI: v_cmp_nle_f16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +; VI: v_cmp_nle_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}} +; VI: v_cmp_nle_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @fcmp_v2f16_nle( <2 x i32> addrspace(1)* %r, <2 x half> addrspace(1)* %a, @@ -579,11 +579,11 @@ } ; GCN-LABEL: {{^}}fcmp_v2f16_neq -; SI: v_cmp_neq_f32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} -; SI: v_cmp_neq_f32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_neq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_neq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}} -; VI: v_cmp_neq_f16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} -; VI: v_cmp_neq_f16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +; VI: v_cmp_neq_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}} +; VI: v_cmp_neq_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @fcmp_v2f16_neq( <2 x i32> addrspace(1)* %r, <2 x half> addrspace(1)* %a, @@ -603,16 +603,16 @@ ; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] ; GCN-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] ; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] -; SI-DAG: v_cmp_nlt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]] +; SI-DAG: v_cmp_nlt_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F32_0]], v[[B_F32_0]] ; GCN-DAG: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] ; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] ; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] -; SI-DAG: v_cmp_nlt_f32_e32 vcc, v[[A_F32_1]], v[[B_F32_1]] -; VI-DAG: v_cmp_nlt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]] +; SI-DAG: v_cmp_nlt_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F32_1]], v[[B_F32_1]] +; VI-DAG: v_cmp_nlt_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_V2_F16]], v[[B_V2_F16]] ; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]] -; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16_1]], v[[B_F16_1]] +; VI: v_cmp_nlt_f16_e64 {{s\[[0-9]+:[0-9]+\]}}, v[[A_F16_1]], v[[B_F16_1]] ; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]] ; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}} ; GCN: s_endpgm Index: test/CodeGen/SystemZ/call-03.ll =================================================================== --- test/CodeGen/SystemZ/call-03.ll +++ test/CodeGen/SystemZ/call-03.ll @@ -62,16 +62,13 @@ ; Check an indirect call. In this case the only acceptable choice for ; the target register is %r1. -; -; NOTE: the extra copy 'lgr %r1, %r0' is a coalescing failure. define void @f5(void(i32, i32, i32, i32) *%foo) { ; CHECK-LABEL: f5: -; CHECK: lgr %r0, %r2 +; CHECK: lgr %r1, %r2 ; CHECK-DAG: lhi %r2, 1 ; CHECK-DAG: lhi %r3, 2 ; CHECK-DAG: lhi %r4, 3 ; CHECK-DAG: lhi %r5, 4 -; CHECK: lgr %r1, %r0 ; CHECK: br %r1 tail call void %foo(i32 1, i32 2, i32 3, i32 4) ret void Index: test/CodeGen/SystemZ/swift-return.ll =================================================================== --- test/CodeGen/SystemZ/swift-return.ll +++ test/CodeGen/SystemZ/swift-return.ll @@ -39,9 +39,8 @@ ; in memroy. The caller provides space for the return value and passes ; the address in %r2. The first input argument will be in %r3. ; CHECK-LABEL: test2: -; CHECK: lr %[[REG1:r[0-9]+]], %r2 +; CHECK: lr %r3, %r2 ; CHECK-DAG: la %r2, 160(%r15) -; CHECK-DAG: lr %r3, %[[REG1]] ; CHECK: brasl %r14, gen2 ; CHECK: l %r2, 160(%r15) ; CHECK: a %r2, 164(%r15) Index: test/CodeGen/SystemZ/swifterror.ll =================================================================== --- test/CodeGen/SystemZ/swifterror.ll +++ test/CodeGen/SystemZ/swifterror.ll @@ -34,11 +34,11 @@ ; CHECK: lgr %r[[REG1:[0-9]+]], %r2 ; CHECK: lghi %r9, 0 ; CHECK: brasl %r14, foo -; CHECK: cgijlh %r9, 0, +; CHECK: %r2, %r9 +; CHECK: jlh ; Access part of the error object and save it to error_ref -; CHECK: lb %r[[REG2:[0-9]+]], 8(%r9) +; CHECK: lb %r[[REG2:[0-9]+]], 8(%r2) ; CHECK: stc %r[[REG2]], 0(%r[[REG1]]) -; CHECK: lgr %r2, %r9 ; CHECK: brasl %r14, free ; CHECK-O0-LABEL: caller: ; CHECK-O0: lghi %r9, 0 @@ -246,11 +246,10 @@ ; CHECK: lhi %r3, 1 ; CHECK: lghi %r9, 0 ; CHECK: brasl %r14, foo_sret -; CHECK: cgijlh %r9, 0, +; CHECK: jlh ; Access part of the error object and save it to error_ref -; CHECK: lb %r0, 8(%r9) +; CHECK: lb %r0, 8(%r2) ; CHECK: stc %r0, 0(%r[[REG1]]) -; CHECK: lgr %r2, %r9 ; CHECK: brasl %r14, free ; CHECK-O0-LABEL: caller3: @@ -296,21 +295,21 @@ ; The first swifterror value: ; CHECK: lghi %r9, 0 ; CHECK: brasl %r14, foo -; CHECK: cgijlh %r9, 0, +; CHECK: ltgr %r2, %r9 +; CHECK: jlh ; Access part of the error object and save it to error_ref -; CHECK: lb %r0, 8(%r9) +; CHECK: lb %r0, 8(%r2) ; CHECK: stc %r0, 0(%r[[REG1]]) -; CHECK: lgr %r2, %r9 ; CHECK: brasl %r14, free ; The second swifterror value: ; CHECK: lghi %r9, 0 ; CHECK: brasl %r14, foo -; CHECK: cgijlh %r9, 0, +; CHECK: ltgr %r2, %r9 +; CHECK: jlh ; Access part of the error object and save it to error_ref -; CHECK: lb %r0, 8(%r9) +; CHECK: lb %r0, 8(%r2) ; CHECK: stc %r0, 0(%r[[REG2]]) -; CHECK: lgr %r2, %r9 ; CHECK: brasl %r14, free ; CHECK-O0-LABEL: caller_with_multiple_swifterror_values: