diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -713,38 +713,6 @@ defm : DSReadPat_mc ; defm : DSReadPat_mc ; -let AddedComplexity = 100 in { - -foreach vt = VReg_64.RegTypes in { -defm : DSReadPat_mc ; -} - -let SubtargetPredicate = isGFX7Plus in { - -foreach vt = VReg_96.RegTypes in { -defm : DSReadPat_mc ; -} - -foreach vt = VReg_128.RegTypes in { -defm : DSReadPat_mc ; -} - -let SubtargetPredicate = HasUnalignedAccessMode in { - -foreach vt = VReg_96.RegTypes in { -defm : DSReadPat_mc ; -} - -foreach vt = VReg_128.RegTypes in { -defm : DSReadPat_mc ; -} - -} // End SubtargetPredicate = HasUnalignedAccessMode - -} // End SubtargetPredicate = isGFX7Plus - -} // End AddedComplexity = 100 - let OtherPredicates = [D16PreservesUnusedBits] in { def : DSReadPat_D16; def : DSReadPat_D16; @@ -870,6 +838,10 @@ let AddedComplexity = 100 in { +foreach vt = VReg_64.RegTypes in { +defm : DSReadPat_mc ; +} + foreach vt = VReg_64.RegTypes in { defm : DSWritePat_mc ; } @@ -877,24 +849,20 @@ let SubtargetPredicate = isGFX7Plus in { foreach vt = VReg_96.RegTypes in { -defm : DSWritePat_mc ; -} - -foreach vt = VReg_128.RegTypes in { -defm : DSWritePat_mc ; +defm : DSReadPat_mc ; } -let SubtargetPredicate = HasUnalignedAccessMode in { - foreach vt = VReg_96.RegTypes in { -defm : DSWritePat_mc ; +defm : DSWritePat_mc ; } foreach vt = VReg_128.RegTypes in { -defm : DSWritePat_mc ; +defm : DSReadPat_mc ; } -} // End SubtargetPredicate = HasUnalignedAccessMode +foreach vt = VReg_128.RegTypes in { +defm : DSWritePat_mc ; +} } // End SubtargetPredicate = isGFX7Plus diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll --- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll @@ -1034,14 +1034,14 @@ ; ; GFX9-UNALIGNED-LABEL: load_misaligned64_constant_offsets: ; GFX9-UNALIGNED: ; %bb.0: -; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v4, 0 -; GFX9-UNALIGNED-NEXT: ds_read_b128 v[0:3], v4 -; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-UNALIGNED-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-UNALIGNED-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-UNALIGNED-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] -; GFX9-UNALIGNED-NEXT: s_endpgm +; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-UNALIGNED-NEXT: ds_read2_b64 v[0:3], v4 offset1:1 +; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-UNALIGNED-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-UNALIGNED-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-UNALIGNED-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX9-UNALIGNED-NEXT: s_endpgm %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4 %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4 %sum = add i64 %val0, %val1 diff --git a/llvm/test/CodeGen/AMDGPU/ds_write2.ll b/llvm/test/CodeGen/AMDGPU/ds_write2.ll --- a/llvm/test/CodeGen/AMDGPU/ds_write2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_write2.ll @@ -837,12 +837,17 @@ ; ; GFX9-UNALIGNED-LABEL: store_misaligned64_constant_offsets: ; GFX9-UNALIGNED: ; %bb.0: -; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v2, v0 -; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v3, v1 -; GFX9-UNALIGNED-NEXT: ds_write_b128 v1, v[0:3] -; GFX9-UNALIGNED-NEXT: s_endpgm +; GFX9-UNALIGNED-NEXT: s_movk_i32 s0, 0x7b +; GFX9-UNALIGNED-NEXT: s_mov_b32 s1, 0 +; GFX9-UNALIGNED-NEXT: s_mov_b32 s2, s0 +; GFX9-UNALIGNED-NEXT: s_mov_b32 s3, s1 +; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-UNALIGNED-NEXT: ds_write2_b64 v4, v[0:1], v[2:3] offset1:1 +; GFX9-UNALIGNED-NEXT: s_endpgm store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4 store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4 ret void @@ -993,18 +998,18 @@ ; ; GFX9-UNALIGNED-LABEL: simple_write2_v4f32_superreg_align4: ; GFX9-UNALIGNED: ; %bb.0: -; GFX9-UNALIGNED-NEXT: s_load_dword s4, s[0:1], 0x24 -; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-UNALIGNED-NEXT: v_lshl_add_u32 v4, v0, 4, s4 -; GFX9-UNALIGNED-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 -; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-UNALIGNED-NEXT: ds_write_b128 v4, v[0:3] -; GFX9-UNALIGNED-NEXT: s_endpgm +; GFX9-UNALIGNED-NEXT: s_load_dword s4, s[0:1], 0x24 +; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-UNALIGNED-NEXT: v_lshl_add_u32 v4, v0, 4, s4 +; GFX9-UNALIGNED-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-UNALIGNED-NEXT: ds_write2_b64 v4, v[0:1], v[2:3] offset1:1 +; GFX9-UNALIGNED-NEXT: s_endpgm %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %in %val0 = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 4