diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -666,9 +666,11 @@ m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))), m_Value(ShAmt2)))))) || match(V, m_OneUse(m_Or(m_Value(X), - m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))))))) - foldLoadsRecursive(X, LOps, DL, AA); - else + m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))))))) { + if (!foldLoadsRecursive(X, LOps, DL, AA) && LOps.FoundRoot) + // Avoid Partial chain merge. + return false; + } else return false; // Check if the pattern has loads @@ -691,18 +693,6 @@ if (LI1->getParent() != LI2->getParent()) return false; - // Swap loads if LI1 comes later as we handle only forward loads. - // This is done as InstCombine folds lowest node forward loads to reverse. - // The implementation will be subsequently extended to handle all reverse - // loads. - if (!LI1->comesBefore(LI2)) { - if (LOps.FoundRoot == false) { - std::swap(LI1, LI2); - std::swap(ShAmt1, ShAmt2); - } else - return false; - } - // Find the data layout bool IsBigEndian = DL.isBigEndian(); @@ -719,6 +709,16 @@ Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2, /* AllowNonInbounds */ true); + // Make sure Load with lower Offest is at LI1 + bool Reverse = false; + if (Offset2.slt(Offset1)) { + std::swap(LI1, LI2); + std::swap(ShAmt1, ShAmt2); + std::swap(Offset1, Offset2); + std::swap(Load1Ptr, Load2Ptr); + Reverse = true; + } + // Verify if both loads have same base pointers and load sizes are same. uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits(); uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits(); @@ -730,9 +730,13 @@ return false; // Alias Analysis to check for store b/w the loads. - MemoryLocation Loc = MemoryLocation::get(LI2); + LoadInst *Start = LI1, *End = LI2; + if (!LI1->comesBefore(LI2)) + std::swap(Start, End); + MemoryLocation Loc = MemoryLocation::get(End); unsigned NumScanned = 0; - for (Instruction &Inst : make_range(LI1->getIterator(), LI2->getIterator())) { + for (Instruction &Inst : + make_range(Start->getIterator(), End->getIterator())) { if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc))) return false; if (++NumScanned > MaxInstrsToScan) @@ -752,9 +756,13 @@ Shift2 = Temp->getZExtValue(); // First load is always LI1. This is where we put the new load. - // Use the merged load size available from LI1, if we already combined loads. - if (LOps.FoundRoot) - LoadSize1 = LOps.LoadSize; + // Use the merged load size available from LI1 for forward loads. + if (LOps.FoundRoot) { + if (!Reverse) + LoadSize1 = LOps.LoadSize; + else + LoadSize2 = LOps.LoadSize; + } // Verify if shift amount and load index aligns and verifies that loads // are consecutive. @@ -769,10 +777,9 @@ AAMDNodes AATags2 = LI2->getAAMetadata(); if (LOps.FoundRoot == false) { LOps.FoundRoot = true; - LOps.LoadSize = LoadSize1 + LoadSize2; AATags1 = LI1->getAAMetadata(); - } else - LOps.LoadSize = LOps.LoadSize + LoadSize2; + } + LOps.LoadSize = LoadSize1 + LoadSize2; // Concatenate the AATags of the Merged Loads. LOps.AATags = AATags1.concat(AATags2); diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll --- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll @@ -1220,25 +1220,29 @@ } define i32 @loadCombine_4consecutive_rev(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_rev( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[S4]], [[S3]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S2]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_rev( +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_rev( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[S4]], [[S3]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S2]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1264,45 +1268,49 @@ } define i64 @loadCombine_8consecutive_rev(ptr %p) { -; ALL-LABEL: @loadCombine_8consecutive_rev( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4 -; ALL-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5 -; ALL-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6 -; ALL-NEXT: [[P7:%.*]] = getelementptr i8, ptr [[P]], i32 7 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1 -; ALL-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1 -; ALL-NEXT: [[L7:%.*]] = load i8, ptr [[P6]], align 1 -; ALL-NEXT: [[L8:%.*]] = load i8, ptr [[P7]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64 -; ALL-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64 -; ALL-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64 -; ALL-NEXT: [[E7:%.*]] = zext i8 [[L7]] to i64 -; ALL-NEXT: [[E8:%.*]] = zext i8 [[L8]] to i64 -; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i64 [[E4]], 24 -; ALL-NEXT: [[S5:%.*]] = shl i64 [[E5]], 32 -; ALL-NEXT: [[S6:%.*]] = shl i64 [[E6]], 40 -; ALL-NEXT: [[S7:%.*]] = shl i64 [[E7]], 48 -; ALL-NEXT: [[S8:%.*]] = shl i64 [[E8]], 56 -; ALL-NEXT: [[O7:%.*]] = or i64 [[S8]], [[S7]] -; ALL-NEXT: [[O6:%.*]] = or i64 [[O7]], [[S6]] -; ALL-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]] -; ALL-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]] -; ALL-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]] -; ALL-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]] -; ALL-NEXT: [[O1:%.*]] = or i64 [[O2]], [[E1]] -; ALL-NEXT: ret i64 [[O1]] +; LE-LABEL: @loadCombine_8consecutive_rev( +; LE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 1 +; LE-NEXT: ret i64 [[L1]] +; +; BE-LABEL: @loadCombine_8consecutive_rev( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4 +; BE-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5 +; BE-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6 +; BE-NEXT: [[P7:%.*]] = getelementptr i8, ptr [[P]], i32 7 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1 +; BE-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1 +; BE-NEXT: [[L7:%.*]] = load i8, ptr [[P6]], align 1 +; BE-NEXT: [[L8:%.*]] = load i8, ptr [[P7]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64 +; BE-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64 +; BE-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64 +; BE-NEXT: [[E7:%.*]] = zext i8 [[L7]] to i64 +; BE-NEXT: [[E8:%.*]] = zext i8 [[L8]] to i64 +; BE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 24 +; BE-NEXT: [[S5:%.*]] = shl i64 [[E5]], 32 +; BE-NEXT: [[S6:%.*]] = shl i64 [[E6]], 40 +; BE-NEXT: [[S7:%.*]] = shl i64 [[E7]], 48 +; BE-NEXT: [[S8:%.*]] = shl i64 [[E8]], 56 +; BE-NEXT: [[O7:%.*]] = or i64 [[S8]], [[S7]] +; BE-NEXT: [[O6:%.*]] = or i64 [[O7]], [[S6]] +; BE-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]] +; BE-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]] +; BE-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]] +; BE-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]] +; BE-NEXT: [[O1:%.*]] = or i64 [[O2]], [[E1]] +; BE-NEXT: ret i64 [[O1]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1348,45 +1356,49 @@ } define i64 @loadCombine_8consecutive_rev_BE(ptr %p) { -; ALL-LABEL: @loadCombine_8consecutive_rev_BE( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4 -; ALL-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5 -; ALL-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6 -; ALL-NEXT: [[P7:%.*]] = getelementptr i8, ptr [[P]], i32 7 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1 -; ALL-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1 -; ALL-NEXT: [[L7:%.*]] = load i8, ptr [[P6]], align 1 -; ALL-NEXT: [[L8:%.*]] = load i8, ptr [[P7]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64 -; ALL-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64 -; ALL-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64 -; ALL-NEXT: [[E7:%.*]] = zext i8 [[L7]] to i64 -; ALL-NEXT: [[E8:%.*]] = zext i8 [[L8]] to i64 -; ALL-NEXT: [[S1:%.*]] = shl i64 [[E1]], 56 -; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 48 -; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 40 -; ALL-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32 -; ALL-NEXT: [[S5:%.*]] = shl i64 [[E5]], 24 -; ALL-NEXT: [[S6:%.*]] = shl i64 [[E6]], 16 -; ALL-NEXT: [[S7:%.*]] = shl i64 [[E7]], 8 -; ALL-NEXT: [[O7:%.*]] = or i64 [[E8]], [[S7]] -; ALL-NEXT: [[O6:%.*]] = or i64 [[O7]], [[S6]] -; ALL-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]] -; ALL-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]] -; ALL-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]] -; ALL-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]] -; ALL-NEXT: [[O1:%.*]] = or i64 [[O2]], [[S1]] -; ALL-NEXT: ret i64 [[O1]] +; LE-LABEL: @loadCombine_8consecutive_rev_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4 +; LE-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5 +; LE-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6 +; LE-NEXT: [[P7:%.*]] = getelementptr i8, ptr [[P]], i32 7 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1 +; LE-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1 +; LE-NEXT: [[L7:%.*]] = load i8, ptr [[P6]], align 1 +; LE-NEXT: [[L8:%.*]] = load i8, ptr [[P7]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64 +; LE-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64 +; LE-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64 +; LE-NEXT: [[E7:%.*]] = zext i8 [[L7]] to i64 +; LE-NEXT: [[E8:%.*]] = zext i8 [[L8]] to i64 +; LE-NEXT: [[S1:%.*]] = shl i64 [[E1]], 56 +; LE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 48 +; LE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 40 +; LE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32 +; LE-NEXT: [[S5:%.*]] = shl i64 [[E5]], 24 +; LE-NEXT: [[S6:%.*]] = shl i64 [[E6]], 16 +; LE-NEXT: [[S7:%.*]] = shl i64 [[E7]], 8 +; LE-NEXT: [[O7:%.*]] = or i64 [[E8]], [[S7]] +; LE-NEXT: [[O6:%.*]] = or i64 [[O7]], [[S6]] +; LE-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]] +; LE-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]] +; LE-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]] +; LE-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]] +; LE-NEXT: [[O1:%.*]] = or i64 [[O2]], [[S1]] +; LE-NEXT: ret i64 [[O1]] +; +; BE-LABEL: @loadCombine_8consecutive_rev_BE( +; BE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 1 +; BE-NEXT: ret i64 [[L1]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1432,45 +1444,49 @@ } define i64 @eggs(ptr noundef readonly %arg) { -; ALL-LABEL: @eggs( -; ALL-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARG:%.*]], align 1 -; ALL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 1 -; ALL-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 -; ALL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2 -; ALL-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1 -; ALL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 3 -; ALL-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1 -; ALL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 4 -; ALL-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1 -; ALL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 5 -; ALL-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1 -; ALL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 6 -; ALL-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP14]], align 1 -; ALL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 7 -; ALL-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1 -; ALL-NEXT: [[TMP18:%.*]] = zext i8 [[TMP17]] to i64 -; ALL-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 56 -; ALL-NEXT: [[TMP20:%.*]] = zext i8 [[TMP15]] to i64 -; ALL-NEXT: [[TMP21:%.*]] = shl nuw nsw i64 [[TMP20]], 48 -; ALL-NEXT: [[TMP22:%.*]] = or i64 [[TMP19]], [[TMP21]] -; ALL-NEXT: [[TMP23:%.*]] = zext i8 [[TMP13]] to i64 -; ALL-NEXT: [[TMP24:%.*]] = shl nuw nsw i64 [[TMP23]], 40 -; ALL-NEXT: [[TMP25:%.*]] = or i64 [[TMP22]], [[TMP24]] -; ALL-NEXT: [[TMP26:%.*]] = zext i8 [[TMP11]] to i64 -; ALL-NEXT: [[TMP27:%.*]] = shl nuw nsw i64 [[TMP26]], 32 -; ALL-NEXT: [[TMP28:%.*]] = or i64 [[TMP25]], [[TMP27]] -; ALL-NEXT: [[TMP29:%.*]] = zext i8 [[TMP9]] to i64 -; ALL-NEXT: [[TMP30:%.*]] = shl nuw nsw i64 [[TMP29]], 24 -; ALL-NEXT: [[TMP31:%.*]] = or i64 [[TMP28]], [[TMP30]] -; ALL-NEXT: [[TMP32:%.*]] = zext i8 [[TMP7]] to i64 -; ALL-NEXT: [[TMP33:%.*]] = shl nuw nsw i64 [[TMP32]], 16 -; ALL-NEXT: [[TMP34:%.*]] = zext i8 [[TMP5]] to i64 -; ALL-NEXT: [[TMP35:%.*]] = shl nuw nsw i64 [[TMP34]], 8 -; ALL-NEXT: [[TMP36:%.*]] = or i64 [[TMP31]], [[TMP33]] -; ALL-NEXT: [[TMP37:%.*]] = zext i8 [[TMP3]] to i64 -; ALL-NEXT: [[TMP38:%.*]] = or i64 [[TMP36]], [[TMP35]] -; ALL-NEXT: [[TMP39:%.*]] = or i64 [[TMP38]], [[TMP37]] -; ALL-NEXT: ret i64 [[TMP39]] +; LE-LABEL: @eggs( +; LE-NEXT: [[TMP3:%.*]] = load i64, ptr [[ARG:%.*]], align 1 +; LE-NEXT: ret i64 [[TMP3]] +; +; BE-LABEL: @eggs( +; BE-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARG:%.*]], align 1 +; BE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 1 +; BE-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 +; BE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2 +; BE-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1 +; BE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 3 +; BE-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1 +; BE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 4 +; BE-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1 +; BE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 5 +; BE-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1 +; BE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 6 +; BE-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP14]], align 1 +; BE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 7 +; BE-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1 +; BE-NEXT: [[TMP18:%.*]] = zext i8 [[TMP17]] to i64 +; BE-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 56 +; BE-NEXT: [[TMP20:%.*]] = zext i8 [[TMP15]] to i64 +; BE-NEXT: [[TMP21:%.*]] = shl nuw nsw i64 [[TMP20]], 48 +; BE-NEXT: [[TMP22:%.*]] = or i64 [[TMP19]], [[TMP21]] +; BE-NEXT: [[TMP23:%.*]] = zext i8 [[TMP13]] to i64 +; BE-NEXT: [[TMP24:%.*]] = shl nuw nsw i64 [[TMP23]], 40 +; BE-NEXT: [[TMP25:%.*]] = or i64 [[TMP22]], [[TMP24]] +; BE-NEXT: [[TMP26:%.*]] = zext i8 [[TMP11]] to i64 +; BE-NEXT: [[TMP27:%.*]] = shl nuw nsw i64 [[TMP26]], 32 +; BE-NEXT: [[TMP28:%.*]] = or i64 [[TMP25]], [[TMP27]] +; BE-NEXT: [[TMP29:%.*]] = zext i8 [[TMP9]] to i64 +; BE-NEXT: [[TMP30:%.*]] = shl nuw nsw i64 [[TMP29]], 24 +; BE-NEXT: [[TMP31:%.*]] = or i64 [[TMP28]], [[TMP30]] +; BE-NEXT: [[TMP32:%.*]] = zext i8 [[TMP7]] to i64 +; BE-NEXT: [[TMP33:%.*]] = shl nuw nsw i64 [[TMP32]], 16 +; BE-NEXT: [[TMP34:%.*]] = zext i8 [[TMP5]] to i64 +; BE-NEXT: [[TMP35:%.*]] = shl nuw nsw i64 [[TMP34]], 8 +; BE-NEXT: [[TMP36:%.*]] = or i64 [[TMP31]], [[TMP33]] +; BE-NEXT: [[TMP37:%.*]] = zext i8 [[TMP3]] to i64 +; BE-NEXT: [[TMP38:%.*]] = or i64 [[TMP36]], [[TMP35]] +; BE-NEXT: [[TMP39:%.*]] = or i64 [[TMP38]], [[TMP37]] +; BE-NEXT: ret i64 [[TMP39]] ; %tmp3 = load i8, ptr %arg, align 1 %tmp4 = getelementptr inbounds i8, ptr %arg, i64 1 @@ -1683,25 +1699,29 @@ } define i32 @loadCombine_4consecutive_lower_index_comes_before(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_lower_index_comes_before( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_lower_index_comes_before( +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_lower_index_comes_before( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll --- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll @@ -1305,19 +1305,8 @@ define i32 @loadCombine_4consecutive_rev(ptr %p) { ; LE-LABEL: @loadCombine_4consecutive_rev( -; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; LE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 1 -; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L3]] to i32 -; LE-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], 16 -; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; LE-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[S2]] -; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]] -; LE-NEXT: ret i32 [[O3]] +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: ret i32 [[L1]] ; ; BE-LABEL: @loadCombine_4consecutive_rev( ; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 @@ -1364,39 +1353,8 @@ define i64 @loadCombine_8consecutive_rev(ptr %p) { ; LE-LABEL: @loadCombine_8consecutive_rev( -; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; LE-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4 -; LE-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5 -; LE-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6 -; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; LE-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1 -; LE-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1 -; LE-NEXT: [[L7:%.*]] = load i16, ptr [[P6]], align 1 -; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L7]] to i64 -; LE-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 48 -; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64 -; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64 -; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64 -; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64 -; LE-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64 -; LE-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64 -; LE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 8 -; LE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16 -; LE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 24 -; LE-NEXT: [[S5:%.*]] = shl i64 [[E5]], 32 -; LE-NEXT: [[S6:%.*]] = shl i64 [[E6]], 40 -; LE-NEXT: [[O6:%.*]] = or i64 [[TMP2]], [[S6]] -; LE-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]] -; LE-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]] -; LE-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]] -; LE-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]] -; LE-NEXT: [[O1:%.*]] = or i64 [[O2]], [[E1]] -; LE-NEXT: ret i64 [[O1]] +; LE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 1 +; LE-NEXT: ret i64 [[L1]] ; ; BE-LABEL: @loadCombine_8consecutive_rev( ; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 @@ -1523,39 +1481,8 @@ ; LE-NEXT: ret i64 [[O1]] ; ; BE-LABEL: @loadCombine_8consecutive_rev_BE( -; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; BE-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4 -; BE-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5 -; BE-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6 -; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; BE-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1 -; BE-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1 -; BE-NEXT: [[L7:%.*]] = load i16, ptr [[P6]], align 1 -; BE-NEXT: [[TMP1:%.*]] = zext i16 [[L7]] to i64 -; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64 -; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64 -; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64 -; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64 -; BE-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64 -; BE-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64 -; BE-NEXT: [[S1:%.*]] = shl i64 [[E1]], 56 -; BE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 48 -; BE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 40 -; BE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32 -; BE-NEXT: [[S5:%.*]] = shl i64 [[E5]], 24 -; BE-NEXT: [[S6:%.*]] = shl i64 [[E6]], 16 -; BE-NEXT: [[O6:%.*]] = or i64 [[TMP1]], [[S6]] -; BE-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]] -; BE-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]] -; BE-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]] -; BE-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]] -; BE-NEXT: [[O1:%.*]] = or i64 [[O2]], [[S1]] -; BE-NEXT: ret i64 [[O1]] +; BE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 1 +; BE-NEXT: ret i64 [[L1]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1602,39 +1529,8 @@ define i64 @eggs(ptr noundef readonly %arg) { ; LE-LABEL: @eggs( -; LE-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARG:%.*]], align 1 -; LE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 1 -; LE-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 -; LE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2 -; LE-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1 -; LE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 3 -; LE-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1 -; LE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 4 -; LE-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1 -; LE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 5 -; LE-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1 -; LE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 6 -; LE-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP14]], align 1 -; LE-NEXT: [[TMP1:%.*]] = zext i16 [[TMP15]] to i64 -; LE-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 48 -; LE-NEXT: [[TMP23:%.*]] = zext i8 [[TMP13]] to i64 -; LE-NEXT: [[TMP24:%.*]] = shl nuw nsw i64 [[TMP23]], 40 -; LE-NEXT: [[TMP25:%.*]] = or i64 [[TMP2]], [[TMP24]] -; LE-NEXT: [[TMP26:%.*]] = zext i8 [[TMP11]] to i64 -; LE-NEXT: [[TMP27:%.*]] = shl nuw nsw i64 [[TMP26]], 32 -; LE-NEXT: [[TMP28:%.*]] = or i64 [[TMP25]], [[TMP27]] -; LE-NEXT: [[TMP29:%.*]] = zext i8 [[TMP9]] to i64 -; LE-NEXT: [[TMP30:%.*]] = shl nuw nsw i64 [[TMP29]], 24 -; LE-NEXT: [[TMP31:%.*]] = or i64 [[TMP28]], [[TMP30]] -; LE-NEXT: [[TMP32:%.*]] = zext i8 [[TMP7]] to i64 -; LE-NEXT: [[TMP33:%.*]] = shl nuw nsw i64 [[TMP32]], 16 -; LE-NEXT: [[TMP34:%.*]] = zext i8 [[TMP5]] to i64 -; LE-NEXT: [[TMP35:%.*]] = shl nuw nsw i64 [[TMP34]], 8 -; LE-NEXT: [[TMP36:%.*]] = or i64 [[TMP31]], [[TMP33]] -; LE-NEXT: [[TMP37:%.*]] = zext i8 [[TMP3]] to i64 -; LE-NEXT: [[TMP38:%.*]] = or i64 [[TMP36]], [[TMP35]] -; LE-NEXT: [[TMP39:%.*]] = or i64 [[TMP38]], [[TMP37]] -; LE-NEXT: ret i64 [[TMP39]] +; LE-NEXT: [[TMP3:%.*]] = load i64, ptr [[ARG:%.*]], align 1 +; LE-NEXT: ret i64 [[TMP3]] ; ; BE-LABEL: @eggs( ; BE-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARG:%.*]], align 1 @@ -1917,25 +1813,29 @@ } define i32 @loadCombine_4consecutive_lower_index_comes_before(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_lower_index_comes_before( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_lower_index_comes_before( +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_lower_index_comes_before( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 diff --git a/llvm/test/Transforms/PhaseOrdering/loadcombine.ll b/llvm/test/Transforms/PhaseOrdering/loadcombine.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/loadcombine.ll @@ -0,0 +1,910 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -O3 -S -mtriple=x86_64-- -mattr=+avx2 < %s | FileCheck %s + +define i32 @loadCombine_4consecutive_1234(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_1234( +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; CHECK-NEXT: ret i32 [[L1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_1243(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_1243( +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32 +; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; CHECK-NEXT: [[O2:%.*]] = or i32 [[S3]], [[TMP1]] +; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; CHECK-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s4 + %o3 = or i32 %o2, %s3 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_1324(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_1324( +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; CHECK-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; CHECK-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; CHECK-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s3 + %o2 = or i32 %o1, %s2 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_1342(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_1342( +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; CHECK-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; CHECK-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; CHECK-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s3 + %o2 = or i32 %o1, %s4 + %o3 = or i32 %o2, %s2 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_1423(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_1423( +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; CHECK-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; CHECK-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; CHECK-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s4 + %o2 = or i32 %o1, %s2 + %o3 = or i32 %o2, %s3 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_1432(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_1432( +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; CHECK-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; CHECK-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; CHECK-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s4 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s2 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_2134(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_2134( +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; CHECK-NEXT: ret i32 [[L1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s2, %e1 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_2143(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_2143( +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32 +; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; CHECK-NEXT: [[O2:%.*]] = or i32 [[S3]], [[TMP1]] +; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; CHECK-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s2, %e1 + %o2 = or i32 %o1, %s4 + %o3 = or i32 %o2, %s3 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_2314(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_2314( +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; CHECK-NEXT: ret i32 [[L1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s2, %s3 + %o2 = or i32 %o1, %e1 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_2341(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_2341( +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; CHECK-NEXT: ret i32 [[L1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s2, %s3 + %o2 = or i32 %o1, %s4 + %o3 = or i32 %o2, %e1 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_2413(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_2413( +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; CHECK-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; CHECK-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; CHECK-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s2, %s4 + %o2 = or i32 %o1, %e1 + %o3 = or i32 %o2, %s3 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_2431(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_2431( +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; CHECK-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; CHECK-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; CHECK-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s2, %s4 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %e1 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_3124(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_3124( +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; CHECK-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; CHECK-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; CHECK-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s3, %e1 + %o2 = or i32 %o1, %s2 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_3142(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_3142( +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; CHECK-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; CHECK-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; CHECK-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s3, %e1 + %o2 = or i32 %o1, %s4 + %o3 = or i32 %o2, %s2 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_3214(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_3214( +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; CHECK-NEXT: ret i32 [[L1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s3, %s2 + %o2 = or i32 %o1, %e1 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_3241(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_3241( +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; CHECK-NEXT: ret i32 [[L1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s3, %s2 + %o2 = or i32 %o1, %s4 + %o3 = or i32 %o2, %e1 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_3412(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_3412( +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; CHECK-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[L3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 16 +; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; CHECK-NEXT: [[O2:%.*]] = or i32 [[S2]], [[E1]] +; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[TMP2]] +; CHECK-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s3, %s4 + %o2 = or i32 %o1, %e1 + %o3 = or i32 %o2, %s2 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_3421(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_3421( +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; CHECK-NEXT: ret i32 [[L1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s3, %s4 + %o2 = or i32 %o1, %s2 + %o3 = or i32 %o2, %e1 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_4123(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_4123( +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; CHECK-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; CHECK-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; CHECK-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s4, %e1 + %o2 = or i32 %o1, %s2 + %o3 = or i32 %o2, %s3 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_4132(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_4132( +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; CHECK-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; CHECK-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; CHECK-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s4, %e1 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s2 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_4213(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_4213( +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; CHECK-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; CHECK-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; CHECK-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s4, %s2 + %o2 = or i32 %o1, %e1 + %o3 = or i32 %o2, %s3 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_4231(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_4231( +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; CHECK-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; CHECK-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; CHECK-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s4, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %e1 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_4312(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_4312( +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; CHECK-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[L3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 16 +; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; CHECK-NEXT: [[O2:%.*]] = or i32 [[S2]], [[E1]] +; CHECK-NEXT: [[O3:%.*]] = or i32 [[O2]], [[TMP2]] +; CHECK-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s4, %s3 + %o2 = or i32 %o1, %e1 + %o3 = or i32 %o2, %s2 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_4321(ptr %p) { +; CHECK-LABEL: @loadCombine_4consecutive_4321( +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; CHECK-NEXT: ret i32 [[L1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s4, %s3 + %o2 = or i32 %o1, %s2 + %o3 = or i32 %o2, %e1 + ret i32 %o3 +}