Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1551,7 +1551,9 @@ // ds_write2_b32 depending on the alignment. In either case with either // alignment there is no faster way of doing this. if (IsFast) - *IsFast = 1; + *IsFast = (Alignment >= RequiredAlignment) ? 64 + : (Alignment < Align(4)) ? 32 + : 1; return true; } @@ -1570,7 +1572,9 @@ // be more of them, so overall we will pay less penalty issuing a single // instruction. if (IsFast) - *IsFast = Alignment >= RequiredAlignment || Alignment < Align(4); + *IsFast = (Alignment >= RequiredAlignment) ? 96 + : (Alignment < Align(4)) ? 32 + : 1; return true; } @@ -1591,7 +1595,9 @@ // will be more of them, so overall we will pay less penalty issuing a // single instruction. if (IsFast) - *IsFast= Alignment >= RequiredAlignment || Alignment < Align(4); + *IsFast = (Alignment >= RequiredAlignment) ? 128 + : (Alignment < Align(4)) ? 32 + : 1; return true; } @@ -1604,7 +1610,7 @@ } if (IsFast) - *IsFast = Alignment >= RequiredAlignment; + *IsFast = (Alignment >= RequiredAlignment) ? Size : 0; return Alignment >= RequiredAlignment || Subtarget->hasUnalignedDSAccessEnabled(); @@ -1662,22 +1668,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses( EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, unsigned *IsFast) const { - bool Allow = allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace, - Alignment, Flags, IsFast); - - if (Allow && IsFast && Subtarget->hasUnalignedDSAccessEnabled() && - (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || - AddrSpace == AMDGPUAS::REGION_ADDRESS)) { - // Lie it is fast if +unaligned-access-mode is passed so that DS accesses - // get vectorized. We could use ds_read2_b*/ds_write2_b* instructions on a - // misaligned data which is faster than a pair of ds_read_b*/ds_write_b* - // which would be equally misaligned. - // This is only used by the common passes, selection always calls the - // allowsMisalignedMemoryAccessesImpl version. - *IsFast= 1; - } - - return Allow; + return allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace, + Alignment, Flags, IsFast); } EVT SITargetLowering::getOptimalMemOpType( @@ -8560,7 +8552,7 @@ auto Flags = Load->getMemOperand()->getFlags(); if (allowsMisalignedMemoryAccessesImpl(MemVT.getSizeInBits(), AS, Load->getAlign(), Flags, &Fast) && - Fast) + Fast > 1) return SDValue(); if (MemVT.isVector()) @@ -9059,7 +9051,7 @@ auto Flags = Store->getMemOperand()->getFlags(); if (allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AS, Store->getAlign(), Flags, &Fast) && - Fast) + Fast > 1) return SDValue(); if (VT.isVector())