Index: llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -179,16 +179,6 @@ } } -static int getBidirectionalReasonRank(GenericSchedulerBase::CandReason Reason) { - switch (Reason) { - default: - return Reason; - case GenericSchedulerBase::RegCritical: - case GenericSchedulerBase::RegExcess: - return -Reason; - } -} - // This function is mostly cut and pasted from // GenericScheduler::pickNodeBidirectional() SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { @@ -261,9 +251,7 @@ } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) { Cand = BotCand; } else { - int TopRank = getBidirectionalReasonRank(TopCand.Reason); - int BotRank = getBidirectionalReasonRank(BotCand.Reason); - if (TopRank > BotRank) { + if (BotCand.Reason > TopCand.Reason) { Cand = TopCand; } else { Cand = BotCand; Index: llvm/trunk/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll +++ llvm/trunk/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll @@ -120,15 +120,16 @@ ; GCN-LABEL: {{^}}sink_ubfe_i16: ; GCN-NOT: lshr -; VI: s_bfe_u32 s0, s0, 0xc0004 +; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c +; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004 ; GCN: s_cbranch_scc1 ; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004 -; VI: s_and_b32 s0, s0, 0xff +; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0xff ; GCN: BB2_2: ; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004 -; VI: s_and_b32 s0, s0, 0x7f +; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0x7f ; GCN: BB2_3: ; GCN: buffer_store_short Index: llvm/trunk/test/CodeGen/AMDGPU/clamp.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/clamp.ll +++ llvm/trunk/test/CodeGen/AMDGPU/clamp.ll @@ -51,8 +51,8 @@ } ; GCN-LABEL: {{^}}v_clamp_negzero_f32: -; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] -; GCN: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1 +; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN-DAG: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[SIGNBIT]], 1.0 define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() Index: llvm/trunk/test/CodeGen/AMDGPU/ds_read2st64.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/ds_read2st64.ll +++ llvm/trunk/test/CodeGen/AMDGPU/ds_read2st64.ll @@ -197,8 +197,8 @@ ; SI-LABEL: @simple_read2st64_f64_over_max_offset ; SI-NOT: ds_read2st64_b64 -; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 -; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}} +; SI-DAG: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 +; SI-DAG: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}} ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]] ; SI: s_endpgm define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { Index: llvm/trunk/test/CodeGen/AMDGPU/fcopysign.f16.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/fcopysign.f16.ll +++ llvm/trunk/test/CodeGen/AMDGPU/fcopysign.f16.ll @@ -10,11 +10,11 @@ declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>) ; GCN-LABEL: {{^}}test_copysign_f16: -; SI: buffer_load_ushort v[[MAG:[0-9]+]] ; SI: buffer_load_ushort v[[SIGN:[0-9]+]] +; SI: buffer_load_ushort v[[MAG:[0-9]+]] ; SI: s_brev_b32 s[[CONST:[0-9]+]], -2 -; SI: v_cvt_f32_f16_e32 v[[MAG_F32:[0-9]+]], v[[MAG]] -; SI: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]] +; SI-DAG: v_cvt_f32_f16_e32 v[[MAG_F32:[0-9]+]], v[[MAG]] +; SI-DAG: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]] ; SI: v_bfi_b32 v[[OUT_F32:[0-9]+]], s[[CONST]], v[[MAG_F32]], v[[SIGN_F32]] ; SI: v_cvt_f16_f32_e32 v[[OUT:[0-9]+]], v[[OUT_F32]] ; VI: buffer_load_ushort v[[SIGN:[0-9]+]] Index: llvm/trunk/test/CodeGen/AMDGPU/flat_atomics.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/flat_atomics.ll +++ llvm/trunk/test/CodeGen/AMDGPU/flat_atomics.ll @@ -532,7 +532,7 @@ } ; GCN-LABEL: {{^}}atomic_umin_i32_ret: -; GCN: flat_atomic_umin v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define void @atomic_umin_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { entry: Index: llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -189,7 +189,7 @@ } ; GCN-LABEL: {{^}}v_insertelement_v2i16_0: -; GCN: flat_load_dword [[VEC:v[0-9]+]] +; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] ; CIVI: v_and_b32_e32 [[ELT1:v[0-9]+]], 0xffff0000, [[VEC]] ; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e7, [[ELT1]] @@ -258,11 +258,11 @@ ; FIXME: fold lshl_or c0, c1, v0 -> or (c0 << c1), v0 ; GCN-LABEL: {{^}}v_insertelement_v2i16_1: -; GCN: flat_load_dword [[VEC:v[0-9]+]] +; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] ; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e70000, [[VEC]] -; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3e7 -; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] +; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3e7 +; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], [[K]], 16, [[ELT0]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] @@ -295,13 +295,13 @@ } ; GCN-LABEL: {{^}}v_insertelement_v2f16_0: -; GCN: flat_load_dword [[VEC:v[0-9]+]] +; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] ; CIVI: v_and_b32_e32 [[ELT1:v[0-9]+]], 0xffff0000, [[VEC]] ; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x4500, [[ELT1]] -; GFX9: v_mov_b32_e32 [[ELT0:v[0-9]+]], 0x4500{{$}} -; GFX9: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[VEC]] +; GFX9-DAG: v_mov_b32_e32 [[ELT0:v[0-9]+]], 0x4500{{$}} +; GFX9-DAG: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[VEC]] ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], [[ELT1]], 16, [[ELT0]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] @@ -337,11 +337,11 @@ } ; GCN-LABEL: {{^}}v_insertelement_v2f16_1: -; GCN: flat_load_dword [[VEC:v[0-9]+]] +; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] ; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x45000000, [[VEC]] -; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x4500 -; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] +; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x4500 +; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], [[K]], 16, [[ELT0]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll @@ -3,9 +3,9 @@ ; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}s_cvt_pkrtz_v2f16_f32: -; GCN: s_load_dword [[X:s[0-9]+]] -; GCN: s_load_dword [[Y:s[0-9]+]] -; GCN: v_mov_b32_e32 [[VY:v[0-9]+]] +; GCN-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x{{b|2c}} +; GCN-DAG: s_load_dword [[SY:s[0-9]+]], s[0:1], 0x{{c|30}} +; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], [[SY]] ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[X]], [[VY]] ; VI: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[X]], [[VY]] define void @s_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float %x, float %y) #0 { Index: llvm/trunk/test/CodeGen/AMDGPU/pack.v2f16.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/pack.v2f16.ll +++ llvm/trunk/test/CodeGen/AMDGPU/pack.v2f16.ll @@ -111,11 +111,11 @@ } ; GCN-LABEL: {{^}}v_pack_v2f16_imm_lo: -; GFX9: flat_load_dword [[VAL1:v[0-9]+]] -; GFX9-DENORM: s_movk_i32 [[K:s[0-9]+]], 0x1234{{$}} +; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]] +; GFX9-DENORM-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234{{$}} ; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[K]], [[VAL1]] -; GFX9-FLUSH: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234{{$}} +; GFX9-FLUSH-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234{{$}} ; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]] ; GFX9: ; use [[PACKED]] define void @v_pack_v2f16_imm_lo(i32 addrspace(1)* %in1) #0 { @@ -133,10 +133,10 @@ } ; GCN-LABEL: {{^}}v_pack_v2f16_inline_imm_lo: -; GFX9: flat_load_dword [[VAL1:v[0-9]+]] +; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]] ; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], 4.0, [[VAL1]] -; GFX9-FLUSH: v_mov_b32_e32 [[K:v[0-9]+]], 0x4400{{$}} +; GFX9-FLUSH-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x4400{{$}} ; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]] ; GFX9: ; use [[PACKED]] @@ -155,11 +155,11 @@ } ; GCN-LABEL: {{^}}v_pack_v2f16_imm_hi: -; GFX9: flat_load_dword [[VAL0:v[0-9]+]] -; GFX9-DENORM: s_movk_i32 [[K:s[0-9]+]], 0x1234 +; GFX9-DAG: flat_load_dword [[VAL0:v[0-9]+]] +; GFX9-DENORM-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234 ; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[K]] -; GFX9-FLUSH: s_movk_i32 [[K:s[0-9]+]], 0x1234 +; GFX9-FLUSH-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234 ; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] ; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]] @@ -179,10 +179,10 @@ } ; GCN-LABEL: {{^}}v_pack_v2f16_inline_f16imm_hi: -; GFX9: flat_load_dword [[VAL:v[0-9]+]] +; GFX9-DAG: flat_load_dword [[VAL:v[0-9]+]] ; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL]], 1.0 -; GFX9-FLUSH: s_movk_i32 [[K:s[0-9]+]], 0x3c00 +; GFX9-FLUSH-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3c00 ; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL]] ; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]] Index: llvm/trunk/test/CodeGen/AMDGPU/pack.v2i16.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/pack.v2i16.ll +++ llvm/trunk/test/CodeGen/AMDGPU/pack.v2i16.ll @@ -103,11 +103,11 @@ } ; GCN-LABEL: {{^}}v_pack_v2i16_imm_lo: -; GFX9: flat_load_dword [[VAL1:v[0-9]+]] -; GFX9-DENORM: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}} +; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]] +; GFX9-DENORM-DAG: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}} ; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[K]], [[VAL1]] -; GFX9-FLUSH: v_mov_b32_e32 [[K:v[0-9]+]], 0x7b{{$}} +; GFX9-FLUSH-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x7b{{$}} ; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]] ; GFX9: ; use [[PACKED]] @@ -144,10 +144,10 @@ } ; GCN-LABEL: {{^}}v_pack_v2i16_imm_hi: -; GFX9: flat_load_dword [[VAL0:v[0-9]+]] +; GFX9-DAG: flat_load_dword [[VAL0:v[0-9]+]] ; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[K]] -; GFX9-FLUSH: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}} +; GFX9-FLUSH-DAG: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}} ; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[VAL0]] ; GFX9: ; use [[PACKED]] Index: llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll +++ llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -55,11 +55,11 @@ ; GCN-LABEL: {{^}}smrd_valu: ; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x2ee0 +; SI: s_mov_b32 ; GCN: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}} ; GCN: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}} ; SI: s_nop 3 ; SI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, [[OFFSET]] -; SI: s_mov_b32 ; CI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0xbb8 ; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]] Index: llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll +++ llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll @@ -372,9 +372,9 @@ } ; GCN-LABEL: {{^}}add_select_fneg_inv2pi_f32: -; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983 ; GCN: buffer_load_dword [[X:v[0-9]+]] ; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc ; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] @@ -390,9 +390,9 @@ } ; GCN-LABEL: {{^}}add_select_fneg_neginv2pi_f32: -; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983 ; GCN: buffer_load_dword [[X:v[0-9]+]] ; GCN: buffer_load_dword [[Y:v[0-9]+]] +; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983 ; SI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc ; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc Index: llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll +++ llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll @@ -166,7 +166,7 @@ ; GCN-LABEL: {{^}}uniform_if_else_ret: ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 -; GCN-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] +; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] ; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; GCN: buffer_store_dword [[TWO]] Index: llvm/trunk/test/CodeGen/AMDGPU/v_cndmask.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/v_cndmask.ll +++ llvm/trunk/test/CodeGen/AMDGPU/v_cndmask.ll @@ -4,8 +4,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 ; GCN-LABEL: {{^}}v_cnd_nan_nosgpr: -; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0 -; GCN: v_cndmask_b32_e32 v{{[0-9]}}, -1, v{{[0-9]+}}, vcc +; GCN: v_cmp_eq_u32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0 +; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]}}, -1, v{{[0-9]+}}, [[COND]] ; GCN-DAG: v{{[0-9]}} ; All nan values are converted to 0xffffffff ; GCN: s_endpgm @@ -105,8 +105,8 @@ ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_vgprZ_f32: ; GCN-DAG: s_load_dword [[X:s[0-9]+]] ; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]] -; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0 -; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[Z]], vcc +; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0 +; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 0, [[Z]], [[COND]] define void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -122,8 +122,8 @@ ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_vgprZ_f32: ; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]] ; GCN-DAG: s_load_dword [[X:s[0-9]+]] -; GCN: v_cmp_nlg_f32_e64 vcc, [[X]], 0 -; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[Z]], vcc +; GCN: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0 +; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 1.0, [[Z]], [[COND]] define void @fcmp_sgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 Index: llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll +++ llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll @@ -156,7 +156,7 @@ ; SI: BB{{[0-9]+_[0-9]+}}: ; %bb20 ; SI: buffer_store_dword -; SI: v_cmp_ge_i64_e32 [[CMP:s\[[0-9]+:[0-9]+\]|vcc]] +; SI: v_cmp_ge_i64_e{{32|64}} [[CMP:s\[[0-9]+:[0-9]+\]|vcc]] ; SI: s_or_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[CMP]], [[COND_STATE]] ; SI: [[LABEL_FLOW]]: