Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3370,7 +3370,7 @@ case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32; case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; - case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32; + case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_U32; case AMDGPU::S_MUL_HI_U32: return AMDGPU::V_MUL_HI_U32; case AMDGPU::S_MUL_HI_I32: return AMDGPU::V_MUL_HI_I32; case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64; Index: llvm/trunk/test/CodeGen/AMDGPU/add3.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/add3.ll +++ llvm/trunk/test/CodeGen/AMDGPU/add3.ll @@ -131,13 +131,13 @@ ; VI: ; %bb.0: ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; VI-NEXT: v_mul_lo_i32 v1, v0, v3 +; VI-NEXT: v_mul_lo_u32 v1, v0, v3 ; VI-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: add3_multiuse_outer: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_add3_u32 v0, v0, v1, v2 -; GFX9-NEXT: v_mul_lo_i32 v1, v0, v3 +; GFX9-NEXT: v_mul_lo_u32 v1, v0, v3 ; GFX9-NEXT: ; return to shader part epilog %inner = add i32 %a, %b %outer = add i32 %inner, %c Index: llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll +++ llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -60,7 +60,7 @@ ; GFX9-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s6 ; GFX9-NEXT: v_add_u32_e32 v0, 4, [[SCALED]] -; GCN-NEXT: v_mul_lo_i32 v0, v0, 9 +; GCN-NEXT: v_mul_lo_u32 v0, v0, 9 ; GCN-NOT: v_mov ; GCN: ds_write_b32 v0, v0 define void @func_other_fi_user_i32() #0 { @@ -172,7 +172,7 @@ ; GFX9-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s6 ; GFX9: v_add_u32_e32 [[VZ:v[0-9]+]], s6, [[SCALED]] -; GCN: v_mul_lo_i32 [[VZ]], [[VZ]], 9 +; GCN: v_mul_lo_u32 [[VZ]], [[VZ]], 9 ; GCN: ds_write_b32 v0, [[VZ]] define void @func_other_fi_user_non_inline_imm_offset_i32() #0 { %alloca0 = alloca [128 x i32], align 4, addrspace(5) @@ -196,7 +196,7 @@ ; GFX9-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, [[DIFF]] ; GFX9: v_add_u32_e32 [[VZ:v[0-9]+]], [[OFFSET]], [[SCALED]] -; GCN: v_mul_lo_i32 [[VZ]], [[VZ]], 9 +; GCN: v_mul_lo_u32 [[VZ]], [[VZ]], 9 ; GCN: ds_write_b32 v0, [[VZ]] define void @func_other_fi_user_non_inline_imm_offset_i32_vcc_live() #0 { %alloca0 = alloca [128 x i32], align 4, addrspace(5) Index: llvm/trunk/test/CodeGen/AMDGPU/idiv-licm.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/idiv-licm.ll +++ llvm/trunk/test/CodeGen/AMDGPU/idiv-licm.ll @@ -6,7 +6,7 @@ ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000, ; GCN: v_cvt_u32_f32_e32 ; GCN-DAG: v_mul_hi_u32 -; GCN-DAG: v_mul_lo_i32 +; GCN-DAG: v_mul_lo_u32 ; GCN-DAG: v_sub_i32_e32 ; GCN-DAG: v_cmp_eq_u32_e64 ; GCN-DAG: v_cndmask_b32_e64 @@ -42,7 +42,7 @@ ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000, ; GCN: v_cvt_u32_f32_e32 ; GCN-DAG: v_mul_hi_u32 -; GCN-DAG: v_mul_lo_i32 +; GCN-DAG: v_mul_lo_u32 ; GCN-DAG: v_sub_i32_e32 ; GCN-DAG: v_cmp_eq_u32_e64 ; GCN-DAG: v_cndmask_b32_e64 @@ -78,7 +78,7 @@ ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000, ; GCN: v_cvt_u32_f32_e32 ; GCN-DAG: v_mul_hi_u32 -; GCN-DAG: v_mul_lo_i32 +; GCN-DAG: v_mul_lo_u32 ; GCN-DAG: v_sub_i32_e32 ; GCN-DAG: v_cmp_eq_u32_e64 ; GCN-DAG: v_cndmask_b32_e64 @@ -114,7 +114,7 @@ ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000, ; GCN: v_cvt_u32_f32_e32 ; GCN-DAG: v_mul_hi_u32 -; GCN-DAG: v_mul_lo_i32 +; GCN-DAG: v_mul_lo_u32 ; GCN-DAG: v_sub_i32_e32 ; GCN-DAG: v_cmp_eq_u32_e64 ; GCN-DAG: v_cndmask_b32_e64 Index: llvm/trunk/test/CodeGen/AMDGPU/mad_64_32.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/mad_64_32.ll +++ llvm/trunk/test/CodeGen/AMDGPU/mad_64_32.ll @@ -4,7 +4,7 @@ ; GCN-LABEL: {{^}}mad_i64_i32_sextops: ; CI: v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3] -; SI: v_mul_lo_i32 +; SI: v_mul_lo_u32 ; SI: v_mul_hi_i32 ; SI: v_add_i32 ; SI: v_addc_u32 @@ -19,7 +19,7 @@ ; GCN-LABEL: {{^}}mad_i64_i32_sextops_commute: ; CI: v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3] -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_mul_hi_i32 ; SI: v_add_i32 ; SI: v_addc_u32 @@ -34,7 +34,7 @@ ; GCN-LABEL: {{^}}mad_u64_u32_zextops: ; CI: v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3] -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_mul_hi_u32 ; SI: v_add_i32 ; SI: v_addc_u32 @@ -49,7 +49,7 @@ ; GCN-LABEL: {{^}}mad_u64_u32_zextops_commute: ; CI: v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3] -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_mul_hi_u32 ; SI: v_add_i32 ; SI: v_addc_u32 Index: llvm/trunk/test/CodeGen/AMDGPU/mul.i16.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/mul.i16.ll +++ llvm/trunk/test/CodeGen/AMDGPU/mul.i16.ll @@ -27,7 +27,7 @@ ; FIXME: Should emit u16 mul here. Instead it's worse than SI ; GCN-LABEL: {{^}}v_mul_i16_uniform_load: ; SI: v_mul_u32_u24 -; GFX89: v_mul_lo_i32 +; GFX89: v_mul_lo_u32 define amdgpu_kernel void @v_mul_i16_uniform_load( i16 addrspace(1)* %r, i16 addrspace(1)* %a, @@ -41,8 +41,8 @@ } ; GCN-LABEL: {{^}}v_mul_v2i16: -; SI: v_mul_lo_i32 -; SI: v_mul_lo_i32 +; SI: v_mul_lo_u32 +; SI: v_mul_lo_u32 ; VI: v_mul_lo_u16_sdwa ; VI: v_mul_lo_u16_e32 @@ -59,9 +59,9 @@ ; FIXME: Unpack garbage on gfx9 ; GCN-LABEL: {{^}}v_mul_v3i16: -; SI: v_mul_lo_i32 -; SI: v_mul_lo_i32 -; SI: v_mul_lo_i32 +; SI: v_mul_lo_u32 +; SI: v_mul_lo_u32 +; SI: v_mul_lo_u32 ; VI: v_mul_lo_u16 ; VI: v_mul_lo_u16 @@ -77,10 +77,10 @@ } ; GCN-LABEL: {{^}}v_mul_v4i16: -; SI: v_mul_lo_i32 -; SI: v_mul_lo_i32 -; SI: v_mul_lo_i32 -; SI: v_mul_lo_i32 +; SI: v_mul_lo_u32 +; SI: v_mul_lo_u32 +; SI: v_mul_lo_u32 +; SI: v_mul_lo_u32 ; VI: v_mul_lo_u16_sdwa ; VI: v_mul_lo_u16_e32 Index: llvm/trunk/test/CodeGen/AMDGPU/mul.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/mul.ll +++ llvm/trunk/test/CodeGen/AMDGPU/mul.ll @@ -1,6 +1,7 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,VI,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=FUNC,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=FUNC,GFX9_10 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=FUNC,GFX9_10 %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=EG,FUNC %s ; mul24 and mad24 are affected @@ -9,8 +10,8 @@ ; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; GCN: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; GCN: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; GCN: v_mul_lo_u32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; GCN: v_mul_lo_u32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define amdgpu_kernel void @test_mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 @@ -27,10 +28,10 @@ ; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; GCN: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; GCN: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; GCN: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; GCN: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; GCN: v_mul_lo_u32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; GCN: v_mul_lo_u32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; GCN: v_mul_lo_u32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; GCN: v_mul_lo_u32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define amdgpu_kernel void @v_mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 @@ -56,7 +57,7 @@ ; FUNC-LABEL: {{^}}v_trunc_i64_mul_to_i32: ; GCN: s_load_dword ; GCN: s_load_dword -; GCN: v_mul_lo_i32 +; GCN: v_mul_lo_u32 ; GCN: buffer_store_dword define amdgpu_kernel void @v_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { %a = load i64, i64 addrspace(1)* %aptr, align 8 @@ -85,7 +86,7 @@ ; FUNC-LABEL: {{^}}v_mul64_sext_c: ; EG-DAG: MULLO_INT ; EG-DAG: MULHI_INT -; GCN-DAG: v_mul_lo_i32 +; GCN-DAG: v_mul_lo_u32 ; GCN-DAG: v_mul_hi_i32 ; GCN: s_endpgm define amdgpu_kernel void @v_mul64_sext_c(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { @@ -97,7 +98,7 @@ } ; FUNC-LABEL: {{^}}v_mul64_sext_inline_imm: -; GCN-DAG: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, 9 +; GCN-DAG: v_mul_lo_u32 v{{[0-9]+}}, v{{[0-9]+}}, 9 ; GCN-DAG: v_mul_hi_i32 v{{[0-9]+}}, v{{[0-9]+}}, 9 ; GCN: s_endpgm define amdgpu_kernel void @v_mul64_sext_inline_imm(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { @@ -122,7 +123,7 @@ } ; FUNC-LABEL: {{^}}v_mul_i32: -; GCN: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_mul_lo_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @v_mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 %a = load i32, i32 addrspace(1)* %in @@ -140,11 +141,11 @@ ; crash with a 'failed to select' error. ; FUNC-LABEL: {{^}}s_mul_i64: -; GFX9-DAG: s_mul_i32 -; GFX9-DAG: s_mul_hi_u32 -; GFX9-DAG: s_mul_i32 -; GFX9-DAG: s_mul_i32 -; GFX9: s_endpgm +; GFX9_10-DAG: s_mul_i32 +; GFX9_10-DAG: s_mul_hi_u32 +; GFX9_10-DAG: s_mul_i32 +; GFX9_10-DAG: s_mul_i32 +; GFX9_10: s_endpgm define amdgpu_kernel void @s_mul_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { %mul = mul i64 %a, %b store i64 %mul, i64 addrspace(1)* %out, align 8 @@ -152,7 +153,7 @@ } ; FUNC-LABEL: {{^}}v_mul_i64: -; GCN: v_mul_lo_i32 +; GCN: v_mul_lo_u32 define amdgpu_kernel void @v_mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) { %a = load i64, i64 addrspace(1)* %aptr, align 8 %b = load i64, i64 addrspace(1)* %bptr, align 8 @@ -250,26 +251,26 @@ ; GCN: {{buffer|flat}}_load_dwordx4 ; GCN: {{buffer|flat}}_load_dwordx4 -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_add_i32_e32 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 +; SI-DAG: v_mul_lo_u32 +; SI-DAG: v_mul_lo_u32 +; SI-DAG: v_mul_lo_u32 +; SI-DAG: v_mul_lo_u32 -; VI-DAG: v_mul_lo_i32 +; VI-DAG: v_mul_lo_u32 ; VI-DAG: v_mul_hi_u32 ; VI: v_mad_u64_u32 ; VI: v_mad_u64_u32 Index: llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll +++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll @@ -1,6 +1,7 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole=0 -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=NOSDWA,GCN %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,SDWA,GCN %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,SDWA,GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,GFX89,SDWA,GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9_10,SDWA,GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10,GFX9_10,SDWA,GCN %s ; GCN-LABEL: {{^}}add_shr_i32: ; NOSDWA: v_lshrrev_b32_e32 v[[DST:[0-9]+]], 16, v{{[0-9]+}} @@ -9,6 +10,7 @@ ; VI: v_add_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX9: v_add_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10: v_add_nc_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %a = load i32, i32 addrspace(1)* %in, align 4 @@ -25,6 +27,7 @@ ; VI: v_subrev_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX9: v_sub_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10: v_sub_nc_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD define amdgpu_kernel void @sub_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %a = load i32, i32 addrspace(1)* %in, align 4 %shr = lshr i32 %a, 16 @@ -52,9 +55,10 @@ } ; GCN-LABEL: {{^}}mul_i16: -; NOSDWA: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; NOSDWA: v_mul_lo_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; NOSDWA-NOT: v_mul_u32_u24_sdwa -; SDWA: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX89: v_mul_lo_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX10: v_mul_lo_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; SDWA-NOT: v_mul_u32_u24_sdwa define amdgpu_kernel void @mul_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %ina, i16 addrspace(1)* %inb) { @@ -78,7 +82,7 @@ ; VI-DAG: v_mul_u32_u24_sdwa v[[DST_MUL_HI:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL_LO]], v[[DST_MUL_HI]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9_10: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) { entry: @@ -104,8 +108,8 @@ ; VI-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL2]], v[[DST_MUL3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; VI-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL0]], v[[DST_MUL1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX9-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @mul_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %ina, <4 x i16> addrspace(1)* %inb) { entry: @@ -137,10 +141,10 @@ ; VI-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL2]], v[[DST_MUL3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; VI-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL0]], v[[DST_MUL1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX9-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX9-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX9-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @mul_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %ina, <8 x i16> addrspace(1)* %inb) { entry: @@ -178,7 +182,7 @@ ; VI-DAG: v_mul_f16_e32 v[[DST_MUL_LO:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} ; VI: v_or_b32_e32 v{{[0-9]+}}, v[[DST_MUL_LO]], v[[DST_MUL_HI]] -; GFX9: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9_10: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @mul_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) { entry: @@ -202,8 +206,8 @@ ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX9-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX9-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @mul_v4half(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %ina, <4 x half> addrspace(1)* %inb) { entry: @@ -231,10 +235,10 @@ ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX9-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX9-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX9-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX9-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @mul_v8half(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %ina, <8 x half> addrspace(1)* %inb) { entry: @@ -246,9 +250,10 @@ } ; GCN-LABEL: {{^}}mul_i8: -; NOSDWA: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; NOSDWA: v_mul_lo_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; NOSDWA-NOT: v_mul_u32_u24_sdwa -; SDWA: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX89: v_mul_lo_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX10: v_mul_lo_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; SDWA-NOT: v_mul_u32_u24_sdwa define amdgpu_kernel void @mul_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %ina, i8 addrspace(1)* %inb) { @@ -272,8 +277,17 @@ ; GFX9-DAG: v_mul_lo_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1 ; GFX9-DAG: v_mul_lo_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} + +; GFX10-DAG: v_mul_lo_u16_e64 +; GFX10-DAG: v_mul_lo_u16_e64 + ; GFX9: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX10-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX10-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 + + define amdgpu_kernel void @mul_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %ina, <2 x i8> addrspace(1)* %inb) { entry: %a = load <2 x i8>, <2 x i8> addrspace(1)* %ina, align 4 @@ -299,6 +313,11 @@ ; GFX9-DAG: v_mul_lo_u16_sdwa ; GFX9-DAG: v_mul_lo_u16_sdwa +; GFX10-DAG: v_mul_lo_u16_e64 +; GFX10-DAG: v_mul_lo_u16_e64 +; GFX10-DAG: v_mul_lo_u16_e64 +; GFX10-DAG: v_mul_lo_u16_e64 + define amdgpu_kernel void @mul_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %ina, <4 x i8> addrspace(1)* %inb) { entry: %a = load <4 x i8>, <4 x i8> addrspace(1)* %ina, align 4 @@ -330,6 +349,15 @@ ; GFX9-DAG: v_mul_lo_u16_sdwa ; GFX9-DAG: v_mul_lo_u16_sdwa +; GFX10-DAG: v_mul_lo_u16_e64 +; GFX10-DAG: v_mul_lo_u16_e64 +; GFX10-DAG: v_mul_lo_u16_e64 +; GFX10-DAG: v_mul_lo_u16_e64 +; GFX10-DAG: v_mul_lo_u16_e64 +; GFX10-DAG: v_mul_lo_u16_e64 +; GFX10-DAG: v_mul_lo_u16_e64 +; GFX10-DAG: v_mul_lo_u16_e64 + define amdgpu_kernel void @mul_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %ina, <8 x i8> addrspace(1)* %inb) { entry: %a = load <8 x i8>, <8 x i8> addrspace(1)* %ina, align 4 @@ -371,8 +399,8 @@ ; VI: v_mac_f16_sdwa v[[DST_MAC:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI: v_lshlrev_b32_e32 v[[DST_SHL:[0-9]+]], 16, v[[DST_MAC]] -; GFX9: v_pk_mul_f16 v[[DST_MUL:[0-9]+]], v{{[0-9]+}}, v[[SRC:[0-9]+]] -; GFX9: v_pk_add_f16 v{{[0-9]+}}, v[[DST_MUL]], v[[SRC]] +; GFX9_10: v_pk_mul_f16 v[[DST_MUL:[0-9]+]], v{{[0-9]+}}, v[[SRC:[0-9]+]] +; GFX9_10: v_pk_add_f16 v{{[0-9]+}}, v[[DST_MUL]], v[[SRC]] define amdgpu_kernel void @mac_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) { entry: @@ -394,6 +422,8 @@ ; GFX9: s_mov_b32 s[[IMM:[0-9]+]], 0x141007b ; GFX9: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, s[[IMM]] +; GFX10: v_pk_mul_lo_u16 v{{[0-9]+}}, 0x141007b, v{{[0-9]+}} + define amdgpu_kernel void @immediate_mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { entry: %a = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4 @@ -413,8 +443,8 @@ ; VI: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9: v_pk_mul_lo_u16 v[[DST1:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} -; GFX9: v_pk_mul_lo_u16 v{{[0-9]+}}, v[[DST1]], v{{[0-9]+}} +; GFX9_10: v_pk_mul_lo_u16 v[[DST1:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} +; GFX9_10: v_pk_mul_lo_u16 v{{[0-9]+}}, v[[DST1]], v{{[0-9]+}} define amdgpu_kernel void @mulmul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) { entry: @@ -431,7 +461,7 @@ ; VI: v_add_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9_10: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @add_bb_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) { entry: @@ -458,12 +488,23 @@ ; NOSDWA-NOT: v_or_b32_sdwa ; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; SDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} +; GFX9_10-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX89-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} +; +; GFX10-DAG: v_lshrrev_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; ; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; SDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} -; SDWA: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX9_10-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX89-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} +; +; GFX10-DAG: v_lshrrev_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; +; GFX89: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; +; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD define amdgpu_kernel void @pulled_out_test(<8 x i8> addrspace(1)* %sourceA, <8 x i8> addrspace(1)* %destValues) { entry: Index: llvm/trunk/test/CodeGen/AMDGPU/sminmax.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/sminmax.ll +++ llvm/trunk/test/CodeGen/AMDGPU/sminmax.ll @@ -41,7 +41,7 @@ ; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] ; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]] ; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]] -; GCN: v_mul_lo_i32 v{{[0-9]+}}, [[MAX]], [[MAX]] +; GCN: v_mul_lo_u32 v{{[0-9]+}}, [[MAX]], [[MAX]] define amdgpu_kernel void @v_abs_i32_repeat_user(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { %val = load i32, i32 addrspace(1)* %src, align 4 %neg = sub i32 0, %val Index: llvm/trunk/test/CodeGen/AMDGPU/srem.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/srem.ll +++ llvm/trunk/test/CodeGen/AMDGPU/srem.ll @@ -21,7 +21,7 @@ ; FUNC-LABEL: {{^}}srem_i32_7: ; SI: s_mov_b32 [[MAGIC:s[0-9]+]], 0x92492493 ; SI: v_mul_hi_i32 {{v[0-9]+}}, {{v[0-9]+}}, [[MAGIC]] -; SI: v_mul_lo_i32 +; SI: v_mul_lo_u32 ; SI: v_sub_{{[iu]}}32 ; SI: s_endpgm define amdgpu_kernel void @srem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { Index: llvm/trunk/test/CodeGen/AMDGPU/udivrem.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/udivrem.ll +++ llvm/trunk/test/CodeGen/AMDGPU/udivrem.ll @@ -29,7 +29,7 @@ ; SI: v_rcp_iflag_f32_e32 [[RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]] -; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]] +; SI-DAG: v_mul_lo_u32 [[RCP_LO:v[0-9]+]], [[RCP]] ; SI-DAG: v_sub_{{[iu]}}32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]] ; SI: v_cmp_eq_u32_e64 [[CC1:s\[[0-9:]+\]]], 0, [[RCP_HI]] ; SI: v_cndmask_b32_e64 [[CND1:v[0-9]+]], [[RCP_LO]], [[NEG_RCP_LO]], [[CC1]] @@ -38,7 +38,7 @@ ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]] ; SI: v_cndmask_b32_e64 [[CND2:v[0-9]+]], [[RCP_S_E]], [[RCP_A_E]], [[CC1]] ; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]], [[CND2]], -; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]], [[CND2]] +; SI: v_mul_lo_u32 [[Num_S_Remainder:v[0-9]+]], [[CND2]] ; SI-DAG: v_add_{{[iu]}}32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]] ; SI-DAG: v_sub_{{[iu]}}32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]] ; SI-DAG: v_cndmask_b32_e64 @@ -113,7 +113,7 @@ ; accurately check all the operands. ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 @@ -121,7 +121,7 @@ ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_add_{{[iu]}}32_e32 @@ -134,7 +134,7 @@ ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 @@ -142,7 +142,7 @@ ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_add_{{[iu]}}32_e32 @@ -260,7 +260,7 @@ ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 @@ -268,7 +268,7 @@ ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_add_{{[iu]}}32_e32 @@ -281,7 +281,7 @@ ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 @@ -289,7 +289,7 @@ ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_add_{{[iu]}}32_e32 @@ -302,7 +302,7 @@ ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 @@ -310,7 +310,7 @@ ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_add_{{[iu]}}32_e32 @@ -323,7 +323,7 @@ ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_lo_u32 ; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 Index: llvm/trunk/test/CodeGen/AMDGPU/urem.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/urem.ll +++ llvm/trunk/test/CodeGen/AMDGPU/urem.ll @@ -22,7 +22,7 @@ ; SI: s_mov_b32 [[MAGIC:s[0-9]+]], 0x24924925 ; SI: v_mul_hi_u32 {{v[0-9]+}}, {{v[0-9]+}}, [[MAGIC]] ; SI: v_subrev_{{[iu]}}32 -; SI: v_mul_lo_i32 +; SI: v_mul_lo_u32 ; SI: v_sub_{{[iu]}}32 ; SI: buffer_store_dword ; SI: s_endpgm Index: llvm/trunk/test/CodeGen/AMDGPU/wqm.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/wqm.ll +++ llvm/trunk/test/CodeGen/AMDGPU/wqm.ll @@ -92,7 +92,7 @@ ;CHECK-NEXT: ; %main_body ;CHECK-NEXT: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec ;CHECK-NEXT: s_wqm_b64 exec, exec -;CHECK: v_mul_lo_i32 [[MUL:v[0-9]+]], v0, v1 +;CHECK: v_mul_lo_u32 [[MUL:v[0-9]+]], v0, v1 ;CHECK: s_and_b64 exec, exec, [[ORIG]] ;CHECK: store ;CHECK: s_wqm_b64 exec, exec Index: llvm/trunk/test/CodeGen/AMDGPU/wwm-reserved.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/wwm-reserved.ll +++ llvm/trunk/test/CodeGen/AMDGPU/wwm-reserved.ll @@ -82,7 +82,7 @@ define i32 @called(i32 %a) noinline { ; GFX9: v_add_u32_e32 v1, v0, v0 %add = add i32 %a, %a -; GFX9: v_mul_lo_i32 v0, v1, v0 +; GFX9: v_mul_lo_u32 v0, v1, v0 %mul = mul i32 %add, %a ; GFX9: v_sub_u32_e32 v0, v0, v1 %sub = sub i32 %mul, %add