diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -596,8 +596,10 @@ /// at to the index of the load. /// \param [in] MemSizeInBits - The number of bits each load should produce. /// - /// \returns The lowest-index load found and the lowest index on success. - Optional> findLoadOffsetsForLoadOrCombine( + /// \returns On success, a 3-tuple containing lowest-index load found, the + /// lowest index, and the last load in the sequence. + Optional> + findLoadOffsetsForLoadOrCombine( SmallDenseMap &MemOffset2Idx, const SmallVector &RegsToVisit, const unsigned MemSizeInBits); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" +#include #define DEBUG_TYPE "gi-combiner" @@ -3455,7 +3456,7 @@ return std::make_pair(Load, Shift / MemSizeInBits); } -Optional> +Optional> CombinerHelper::findLoadOffsetsForLoadOrCombine( SmallDenseMap &MemOffset2Idx, const SmallVector &RegsToVisit, const unsigned MemSizeInBits) { @@ -3478,10 +3479,10 @@ const MachineMemOperand *MMO = nullptr; // Earliest instruction-order load in the pattern. - MachineInstr *EarliestLoad = nullptr; + GZExtLoad *EarliestLoad = nullptr; // Latest instruction-order load in the pattern. - MachineInstr *LatestLoad = nullptr; + GZExtLoad *LatestLoad = nullptr; // Base pointer which every load should share. Register BasePtr; @@ -3586,7 +3587,7 @@ return None; } - return std::make_pair(LowestIdxLoad, LowestIdx); + return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad); } bool CombinerHelper::matchLoadOrCombine( @@ -3634,13 +3635,13 @@ // Also verify that each of these ends up putting a[i] into the same memory // offset as a load into a wide type would. SmallDenseMap MemOffset2Idx; - GZExtLoad *LowestIdxLoad; + GZExtLoad *LowestIdxLoad, *LatestLoad; int64_t LowestIdx; auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine( MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits); if (!MaybeLoadInfo) return false; - std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo; + std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo; // We have a bunch of loads being OR'd together. Using the addresses + offsets // we found before, check if this corresponds to a big or little endian byte @@ -3695,6 +3696,7 @@ return false; MatchInfo = [=](MachineIRBuilder &MIB) { + MIB.setInstrAndDebugLoc(*LatestLoad); Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst; MIB.buildLoad(LoadDst, Ptr, *NewMMO); if (NeedsBSwap) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir @@ -1397,9 +1397,6 @@ name: dont_combine_store_between_different_mbb tracksRegLiveness: true body: | - ; There is a store between the two loads, hidden away in a different MBB. - ; We should not combine here. - ; LITTLE-LABEL: name: dont_combine_store_between_different_mbb ; LITTLE: bb.0: ; LITTLE: successors: %bb.1(0x80000000) @@ -1444,6 +1441,9 @@ ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 + ; There is a store between the two loads, hidden away in a different MBB. + ; We should not combine here. + bb.0: successors: %bb.1(0x80000000) @@ -1479,8 +1479,6 @@ name: different_mbb tracksRegLiveness: true body: | - ; It should be possible to combine here, but it's not supported right now. - ; LITTLE-LABEL: name: different_mbb ; LITTLE: bb.0: ; LITTLE: successors: %bb.1(0x80000000) @@ -1513,6 +1511,8 @@ ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 + ; It should be possible to combine here, but it's not supported right now. + bb.0: successors: %bb.1(0x80000000) @@ -1569,3 +1569,67 @@ %full_load:_(s32) = G_OR %low_half, %high_half $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 + +... +--- +name: store_between_loads_and_or +alignment: 4 +tracksRegLiveness: true + +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $x0, $x1 + ; Check that we build the G_LOAD at the point of the last load, instead of place of the G_OR. + ; We could have a G_STORE in between which may not be safe to move the load across. + liveins: $x0, $x1 + ; LITTLE-LABEL: name: store_between_loads_and_or + ; LITTLE: liveins: $x0, $x1, $x0, $x1 + ; LITTLE: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; LITTLE: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; LITTLE: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) + ; LITTLE: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8)) + ; LITTLE: $w0 = COPY [[LOAD]](s32) + ; LITTLE: RET_ReallyLR implicit $w0 + ; BIG-LABEL: name: store_between_loads_and_or + ; BIG: liveins: $x0, $x1, $x0, $x1 + ; BIG: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; BIG: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; BIG: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) + ; BIG: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[LOAD]] + ; BIG: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8)) + ; BIG: $w0 = COPY [[BSWAP]](s32) + ; BIG: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %12:_(s8) = G_CONSTANT i8 1 + %15:_(s32) = G_CONSTANT i32 8 + %19:_(s32) = G_CONSTANT i32 16 + %23:_(s32) = G_CONSTANT i32 24 + %13:_(s32) = G_ZEXTLOAD %0:_(p0) :: (load (s8)) + %3:_(s64) = G_CONSTANT i64 1 + %4:_(p0) = G_PTR_ADD %0:_, %3:_(s64) + %14:_(s32) = G_ZEXTLOAD %4:_(p0) :: (load (s8)) + %6:_(s64) = G_CONSTANT i64 2 + %7:_(p0) = G_PTR_ADD %0:_, %6:_(s64) + %18:_(s32) = G_ZEXTLOAD %7:_(p0) :: (load (s8)) + %9:_(s64) = G_CONSTANT i64 3 + %10:_(p0) = G_PTR_ADD %0:_, %9:_(s64) + %22:_(s32) = G_ZEXTLOAD %10:_(p0) :: (load (s8)) + G_STORE %12:_(s8), %1:_(p0) :: (store (s8)) + %16:_(s32) = nuw nsw G_SHL %14:_, %15:_(s32) + %17:_(s32) = G_OR %16:_, %13:_ + %20:_(s32) = nuw nsw G_SHL %18:_, %19:_(s32) + %21:_(s32) = G_OR %17:_, %20:_ + %24:_(s32) = nuw G_SHL %22:_, %23:_(s32) + %25:_(s32) = G_OR %21:_, %24:_ + $w0 = COPY %25:_(s32) + RET_ReallyLR implicit $w0 + +...