diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -713,38 +713,6 @@ defm : DSReadPat_mc ; defm : DSReadPat_mc ; -let AddedComplexity = 100 in { - -foreach vt = VReg_64.RegTypes in { -defm : DSReadPat_mc ; -} - -let SubtargetPredicate = isGFX7Plus in { - -foreach vt = VReg_96.RegTypes in { -defm : DSReadPat_mc ; -} - -foreach vt = VReg_128.RegTypes in { -defm : DSReadPat_mc ; -} - -let SubtargetPredicate = HasUnalignedAccessMode in { - -foreach vt = VReg_96.RegTypes in { -defm : DSReadPat_mc ; -} - -foreach vt = VReg_128.RegTypes in { -defm : DSReadPat_mc ; -} - -} // End SubtargetPredicate = HasUnalignedAccessMode - -} // End SubtargetPredicate = isGFX7Plus - -} // End AddedComplexity = 100 - let OtherPredicates = [D16PreservesUnusedBits] in { def : DSReadPat_D16; def : DSReadPat_D16; @@ -870,6 +838,10 @@ let AddedComplexity = 100 in { +foreach vt = VReg_64.RegTypes in { +defm : DSReadPat_mc ; +} + foreach vt = VReg_64.RegTypes in { defm : DSWritePat_mc ; } @@ -877,24 +849,20 @@ let SubtargetPredicate = isGFX7Plus in { foreach vt = VReg_96.RegTypes in { -defm : DSWritePat_mc ; -} - -foreach vt = VReg_128.RegTypes in { -defm : DSWritePat_mc ; +defm : DSReadPat_mc ; } -let SubtargetPredicate = HasUnalignedAccessMode in { - foreach vt = VReg_96.RegTypes in { -defm : DSWritePat_mc ; +defm : DSWritePat_mc ; } foreach vt = VReg_128.RegTypes in { -defm : DSWritePat_mc ; +defm : DSReadPat_mc ; } -} // End SubtargetPredicate = HasUnalignedAccessMode +foreach vt = VReg_128.RegTypes in { +defm : DSWritePat_mc ; +} } // End SubtargetPredicate = isGFX7Plus diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1431,10 +1431,8 @@ return Aligned; } if (Size == 128) { - // ds_read/write_b128 require 16-byte alignment on gfx8 and older, but we - // can do a 8 byte aligned, 16 byte access in a single operation using - // ds_read2/write2_b64. - bool Aligned = Alignment >= Align(8); + // ds_read/write_b128 require 16-byte alignment on gfx8 and older. + bool Aligned = Alignment >= Align(16); if (IsFast) *IsFast = Aligned;