Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -262,7 +262,7 @@ unsigned RegSize = Ty.getSizeInBits(); unsigned MemSize = Query.MMODescrs[0].SizeInBits; - unsigned Align = Query.MMODescrs[0].AlignInBits; + unsigned AlignBits = Query.MMODescrs[0].AlignInBits; unsigned AS = Query.Types[1].getAddressSpace(); // All of these need to be custom lowered to cast the pointer operand. @@ -305,9 +305,10 @@ assert(RegSize >= MemSize); - if (Align < MemSize) { + if (AlignBits < MemSize) { const SITargetLowering *TLI = ST.getTargetLowering(); - if (!TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8)) + if (!TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, + Align(AlignBits / 8))) return false; } @@ -954,10 +955,10 @@ // Split vector extloads. unsigned MemSize = Query.MMODescrs[0].SizeInBits; - unsigned Align = Query.MMODescrs[0].AlignInBits; + unsigned AlignBits = Query.MMODescrs[0].AlignInBits; if (MemSize < DstTy.getSizeInBits()) - MemSize = std::max(MemSize, Align); + MemSize = std::max(MemSize, AlignBits); if (DstTy.isVector() && DstTy.getSizeInBits() > MemSize) return true; @@ -979,9 +980,10 @@ return true; } - if (Align < MemSize) { + if (AlignBits < MemSize) { const SITargetLowering *TLI = ST.getTargetLowering(); - return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8); + return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, + Align(AlignBits / 8)); } return false; Index: llvm/lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.h +++ llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -255,12 +255,22 @@ const SelectionDAG &DAG) const override; bool allowsMisalignedMemoryAccessesImpl( - unsigned Size, unsigned AS, unsigned Align, + unsigned Size, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags = MachineMemOperand::MONone, bool *IsFast = nullptr) const; bool allowsMisalignedMemoryAccesses( - EVT VT, unsigned AS, unsigned Align, + LLT Ty, unsigned AddrSpace, Align Alignment, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *IsFast = nullptr) const override { + if (IsFast) + *IsFast = false; + return allowsMisalignedMemoryAccessesImpl(Ty.getSizeInBits(), AddrSpace, + Alignment, Flags, IsFast); + } + + bool allowsMisalignedMemoryAccesses( + EVT VT, unsigned AS, unsigned Alignment, MachineMemOperand::Flags Flags = MachineMemOperand::MONone, bool *IsFast = nullptr) const override; Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1384,7 +1384,7 @@ } bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( - unsigned Size, unsigned AddrSpace, unsigned Align, + unsigned Size, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, bool *IsFast) const { if (IsFast) *IsFast = false; @@ -1394,7 +1394,7 @@ // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte // aligned, 8 byte access in a single operation using ds_read2/write2_b32 // with adjacent offsets. - bool AlignedBy4 = (Align % 4 == 0); + bool AlignedBy4 = Alignment >= Align(4); if (IsFast) *IsFast = AlignedBy4; @@ -1407,7 +1407,7 @@ if (!Subtarget->hasUnalignedScratchAccess() && (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS || AddrSpace == AMDGPUAS::FLAT_ADDRESS)) { - bool AlignedBy4 = Align >= 4; + bool AlignedBy4 = Alignment >= Align(4); if (IsFast) *IsFast = AlignedBy4; @@ -1422,7 +1422,7 @@ // 2-byte alignment is worse than 1 unless doing a 2-byte accesss. *IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS || AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ? - Align >= 4 : Align != 2; + Alignment >= Align(4) : Alignment != Align(2); } return true; @@ -1438,12 +1438,12 @@ if (IsFast) *IsFast = true; - return Size >= 32 && Align >= 4; + return Size >= 32 && Alignment >= Align(4); } bool SITargetLowering::allowsMisalignedMemoryAccesses( - EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, - bool *IsFast) const { + EVT VT, unsigned AddrSpace, unsigned Alignment, + MachineMemOperand::Flags Flags, bool *IsFast) const { if (IsFast) *IsFast = false; @@ -1457,7 +1457,7 @@ } return allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace, - Align, Flags, IsFast); + Align(Alignment), Flags, IsFast); } EVT SITargetLowering::getOptimalMemOpType(