Index: llvm/include/llvm/IR/PatternMatch.h =================================================================== --- llvm/include/llvm/IR/PatternMatch.h +++ llvm/include/llvm/IR/PatternMatch.h @@ -1052,9 +1052,9 @@ } template -inline BinaryOp_match m_Mul(const LHS &L, - const RHS &R) { - return BinaryOp_match(L, R); +inline BinaryOp_match m_Mul(const LHS &L, + const RHS &R) { + return BinaryOp_match(L, R); } template Index: llvm/test/Analysis/DivergenceAnalysis/AMDGPU/wave-id-computation.ll =================================================================== --- llvm/test/Analysis/DivergenceAnalysis/AMDGPU/wave-id-computation.ll +++ llvm/test/Analysis/DivergenceAnalysis/AMDGPU/wave-id-computation.ll @@ -158,12 +158,11 @@ ret i32 %i15 } -; FIXME: Should be recognized as uniform ; CHECK-LABEL: 'Divergence Analysis' for function 'calculate_wid_x_commute_mul': ; CHECK: DIVERGENT: %i13 = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !2 ; CHECK-NEXT: DIVERGENT: %i14 = add i32 %i12, %i13 -; CHECK-NEXT: DIVERGENT: %i15 = sdiv i32 %i14, 32 -; CHECK-NEXT: DIVERGENT: ret i32 %i15 +; CHECK-NEXT: %i15 = sdiv i32 %i14, 32 +; CHECK-NEXT: ret i32 %i15 define hidden i32 @calculate_wid_x_commute_mul() #0 { bb: %i = tail call i32 @llvm.amdgcn.workgroup.id.x() Index: llvm/test/CodeGen/AMDGPU/wave-id-computation.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/wave-id-computation.ll +++ llvm/test/CodeGen/AMDGPU/wave-id-computation.ll @@ -100,6 +100,8 @@ ; CHECK-NEXT: v_lshrrev_b32_e32 v1, 26, v1 ; CHECK-NEXT: v_add_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_ashrrev_i32_e32 v0, 6, v0 +; CHECK-NEXT: v_readfirstlane_b32 s4, v0 +; CHECK-NEXT: v_mov_b32_e32 v0, s4 ; CHECK-NEXT: s_setpc_b64 s[30:31] bb: %i = tail call i32 @llvm.amdgcn.workgroup.id.x() @@ -186,27 +188,27 @@ ; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; CHECK-NEXT: v_lshrrev_b32_e32 v1, 26, v1 ; CHECK-NEXT: v_add_u32_e32 v1, v0, v1 -; CHECK-NEXT: v_and_b32_e32 v2, 0xffffffc0, v1 -; CHECK-NEXT: v_ashrrev_i32_e32 v1, 6, v1 -; CHECK-NEXT: v_sub_u32_e32 v0, v0, v2 -; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v1 -; CHECK-NEXT: v_lshlrev_b64 v[1:2], 2, v[1:2] -; CHECK-NEXT: v_mov_b32_e32 v4, s1 -; CHECK-NEXT: v_add_co_u32_e32 v3, vcc, s0, v1 -; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; CHECK-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] -; CHECK-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v2, vcc -; CHECK-NEXT: v_mov_b32_e32 v2, s3 -; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, s2, v0 -; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc -; CHECK-NEXT: global_load_dword v2, v[3:4], off -; CHECK-NEXT: global_load_dword v5, v[0:1], off -; CHECK-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; CHECK-NEXT: v_ashrrev_i32_e32 v2, 6, v1 +; CHECK-NEXT: v_readfirstlane_b32 s4, v2 +; CHECK-NEXT: v_and_b32_e32 v1, 0xffffffc0, v1 +; CHECK-NEXT: s_ashr_i32 s5, s4, 31 +; CHECK-NEXT: v_sub_u32_e32 v0, v0, v1 +; CHECK-NEXT: s_lshl_b64 s[4:5], s[4:5], 2 +; CHECK-NEXT: v_readfirstlane_b32 s8, v0 +; CHECK-NEXT: s_add_u32 s0, s0, s4 +; CHECK-NEXT: s_addc_u32 s1, s1, s5 +; CHECK-NEXT: s_ashr_i32 s9, s8, 31 +; CHECK-NEXT: s_lshl_b64 s[4:5], s[8:9], 2 +; CHECK-NEXT: s_add_u32 s2, s2, s4 +; CHECK-NEXT: s_addc_u32 s3, s3, s5 +; CHECK-NEXT: s_load_dword s8, s[0:1], 0x0 +; CHECK-NEXT: s_load_dword s9, s[2:3], 0x0 +; CHECK-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_add_u32_e32 v1, v2, v5 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: global_store_dword v0, v1, s[0:1] +; CHECK-NEXT: s_add_i32 s0, s8, s9 +; CHECK-NEXT: v_mov_b32_e32 v1, s0 +; CHECK-NEXT: global_store_dword v0, v1, s[4:5] ; CHECK-NEXT: s_endpgm %id.x = call i32 @llvm.amdgcn.workitem.id.x() %gid.x = call i32 @llvm.amdgcn.workgroup.id.x()