Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3056,18 +3056,34 @@ return Legalized; } - if (isPowerOf2_32(MemTy.getSizeInBits())) - return UnableToLegalize; // Don't know what we're being asked to do. + unsigned MemSizeInBits = MemTy.getSizeInBits(); + uint64_t LargeSplitSize, SmallSplitSize; + + if (!isPowerOf2_32(MemSizeInBits)) { + LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits()); + SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize; + } else { + auto &Ctx = MF.getFunction().getContext(); + if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO)) + return UnableToLegalize; // Don't know what we're being asked to do. + + SmallSplitSize = LargeSplitSize = MemSizeInBits / 2; + } // Extend to the next pow-2. If this store was itself the result of lowering, // e.g. an s56 store being broken into s32 + s24, we might have a stored type - // that's wider the stored size. - const LLT NewSrcTy = LLT::scalar(NextPowerOf2(MemTy.getSizeInBits())); + // that's wider than the stored size. + unsigned AnyExtSize = PowerOf2Ceil(SrcTy.getSizeInBits()); + const LLT NewSrcTy = LLT::scalar(AnyExtSize); + + if (SrcTy.isPointer()) { + const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits()); + SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0); + } + auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg); // Obtain the smaller value by shifting away the larger value. - uint64_t LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits()); - uint64_t SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize; auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize); auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt); @@ -3075,9 +3091,8 @@ LLT PtrTy = MRI.getType(PtrReg); auto OffsetCst = MIRBuilder.buildConstant( LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); - Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); auto SmallPtr = - MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst); + MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst); MachineMemOperand *LargeMMO = MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -826,15 +826,14 @@ ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; CI: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; CI: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY2]](s64) + ; CI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY3]], [[C]](s32) + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY3]](s64) - ; CI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) - ; CI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; CI: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1) + ; CI: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) + ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_s48_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -875,15 +874,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; GFX9: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY2]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY3]], [[C]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY3]](s64) - ; GFX9: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; GFX9: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1) + ; GFX9: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) + ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s48) = G_TRUNC %1 @@ -919,15 +917,14 @@ ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; CI: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; CI: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY2]](s64) + ; CI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY3]], [[C]](s32) + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY3]](s64) - ; CI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) - ; CI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; CI: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; CI: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) + ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-LABEL: name: test_store_global_s48_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -951,15 +948,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; GFX9: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY2]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY3]], [[C]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY3]](s64) - ; GFX9: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; GFX9: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; GFX9: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) + ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s48) = G_TRUNC %1