Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -700,11 +700,9 @@ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Result.getValueType(), Result, DAG.getValueType(SrcVT)); - else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) - // All the top bits are guaranteed to be zero - inform the optimizers. - Result = DAG.getNode(ISD::AssertZext, dl, - Result.getValueType(), Result, - DAG.getValueType(SrcVT)); + else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) { + Result = DAG.getZeroExtendInReg(Result, dl, SrcVT); + } Value = Result; Chain = Ch; Index: test/CodeGen/AMDGPU/ctlz.ll =================================================================== --- test/CodeGen/AMDGPU/ctlz.ll +++ test/CodeGen/AMDGPU/ctlz.ll @@ -255,10 +255,14 @@ } ; FIXME: Need to handle non-uniform case for function below (load without gep). +; FIXME: Why difference with VI? ; FUNC-LABEL: {{^}}v_ctlz_i7_sel_eq_neg1: -; GCN: {{buffer|flat}}_load_ubyte [[VAL:v[0-9]+]], +; GCN-DAG: {{buffer|flat}}_load_ubyte [[VAL:v[0-9]+]], +; SI-DAG: s_movk_i32 [[MASK:s[0-9]+]], 0x7f +; VI: v_and_b32_e32 [[VAL]], 0x7f, [[VAL]] ; GCN: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]] -; GCN: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x7f, [[FFBH]] +; SI: v_and_b32_e32 [[TRUNC:v[0-9]+]], [[MASK]], [[FFBH]] +; VI: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x7f, [[FFBH]] ; GCN: {{buffer|flat}}_store_byte [[TRUNC]], define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out, i7 addrspace(1)* noalias %valptr) nounwind { %tid = call i32 @llvm.r600.read.tidig.x() Index: test/CodeGen/AMDGPU/load-local-i1.ll =================================================================== --- test/CodeGen/AMDGPU/load-local-i1.ll +++ test/CodeGen/AMDGPU/load-local-i1.ll @@ -66,8 +66,9 @@ } ; FUNC-LABEL: {{^}}local_zextload_i1_to_i32: -; GCN: ds_read_u8 -; GCN: ds_write_b32 +; GCN: ds_read_u8 [[LOAD:v[0-9]+]] +; GCN: v_and_b32_e32 [[AND:v[0-9]+]], 1, [[LOAD]] +; GCN: ds_write_b32 v{{[0-9]+}}, [[AND]] define amdgpu_kernel void @local_zextload_i1_to_i32(i32 addrspace(3)* %out, i1 addrspace(3)* %in) #0 { %a = load i1, i1 addrspace(3)* %in %ext = zext i1 %a to i32 Index: test/CodeGen/AMDGPU/udiv.ll =================================================================== --- test/CodeGen/AMDGPU/udiv.ll +++ test/CodeGen/AMDGPU/udiv.ll @@ -128,8 +128,11 @@ } ; FUNC-LABEL: {{^}}v_udiv_i23: +; SI: s_mov_b32 [[MASK:s[0-9]+]], 0x7fffff +; SI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], +; SI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], ; SI: v_rcp_f32 -; SI: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x7fffff, v{{[0-9]+}} +; SI: v_and_b32_e32 [[TRUNC:v[0-9]+]], [[MASK]], v{{[0-9]+}} ; SI: buffer_store_dword [[TRUNC]] define amdgpu_kernel void @v_udiv_i23(i32 addrspace(1)* %out, i23 addrspace(1)* %in) { %den_ptr = getelementptr i23, i23 addrspace(1)* %in, i23 1