diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2947,15 +2947,17 @@ if (isPowerOf2_32(MemTy.getSizeInBits())) return UnableToLegalize; // Don't know what we're being asked to do. - // Extend to the next pow-2. - const LLT ExtendTy = LLT::scalar(NextPowerOf2(MemTy.getSizeInBits())); - auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); + // Extend to the next pow-2. If this store was itself the result of lowering, + // e.g. an s56 store being broken into s32 + s24, we might have a stored type + // that's wider the stored size. + const LLT NewSrcTy = LLT::scalar(NextPowerOf2(MemTy.getSizeInBits())); + auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg); // Obtain the smaller value by shifting away the larger value. uint64_t LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits()); - uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; - auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); - auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); + uint64_t SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize; + auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize); + auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt); // Generate the PtrAdd and truncating stores. LLT PtrTy = MRI.getType(PtrReg); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir @@ -1,22 +1,11 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=aarch64 -run-pass=legalizer %s -o - -verify-machineinstrs | FileCheck %s ---- | - target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" - target triple = "aarch64" - - define i32 @load_store_test(i24* %ptr, i24* %ptr2) { - %val = load i24, i24* %ptr - store i24 %val, i24* %ptr2 - ret i32 0 - } - -... --- name: load_store_test alignment: 4 tracksRegLiveness: true body: | - bb.1 (%ir-block.0): + bb.1: liveins: $x0, $x1 ; CHECK-LABEL: name: load_store_test @@ -24,26 +13,60 @@ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16) from %ir.ptr, align 4) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16), align 4) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from %ir.ptr + 2, align 2, basealign 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 2, align 2) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C2]](s64) ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s64) ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store (s16) into %ir.ptr2, align 4) - ; CHECK: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store (s8) into %ir.ptr2 + 2, align 2, basealign 4) + ; CHECK: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store (s16), align 4) + ; CHECK: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store (s8) into unknown-address + 2, align 2) ; CHECK: $w0 = COPY [[C]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %3:_(s32) = G_CONSTANT i32 0 - %2:_(s24) = G_LOAD %0(p0) :: (load (s24) from %ir.ptr, align 4) - G_STORE %2(s24), %1(p0) :: (store (s24) into %ir.ptr2, align 4) + %2:_(s24) = G_LOAD %0(p0) :: (load (s24), align 4) + G_STORE %2(s24), %1(p0) :: (store (s24), align 4) $w0 = COPY %3(s32) RET_ReallyLR implicit $w0 ... +--- +name: store_i56 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: store_i56 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C1]](s64) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: G_STORE [[COPY1]](s64), [[COPY]](p0) :: (store (s32), align 8) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC]], [[C3]](s64) + ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) + ; CHECK: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 4, align 4) + ; CHECK: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p0) :: (store (s8) into unknown-address + 6, align 2) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(s56) = G_CONSTANT i56 32 + G_STORE %1(s56), %0(p0) :: (store (s56), align 8) + RET_ReallyLR + +...