Index: llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp =================================================================== --- llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -50,6 +50,10 @@ "Number of guarded funnel shifts transformed into funnel shifts"); STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized"); +static cl::opt MaxInstrsToScan( + "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, + cl::desc("Max number of instructions to scan for aggressive instcombine.")); + namespace { /// Contains expression pattern combiner logic. /// This class provides both the logic to combine expression patterns and @@ -635,18 +639,214 @@ return true; } +/// This is used by foldLoadsRecursive() to capture a Root Load node which is +/// of type or(load, load) and recursively build the wide load. Also capture the +/// shift amount, zero extend type and loadSize. +struct LoadOps { + LoadInst *Root = nullptr; + bool FoundRoot = false; + uint64_t LoadSize = 0; + Value *Shift = nullptr; + Type *ZextType; + AAMDNodes AATags; +}; + +// Identify and Merge consecutive loads recursively which is of the form +// (ZExt(L1) << shift1) | (ZExt(L2) << shift2) -> ZExt(L3) << shift1 +// (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3) +static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, + AliasAnalysis &AA) { + Value *ShAmt2 = nullptr; + Value *X; + Instruction *L1, *L2; + + // Go to the last node with loads. + if (match(V, m_OneUse(m_c_Or( + m_Value(X), + m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))), + m_Value(ShAmt2)))))) || + match(V, m_OneUse(m_Or(m_Value(X), + m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))))))) + foldLoadsRecursive(X, LOps, DL, AA); + else + return false; + + // Check if the pattern has loads + LoadInst *LI1 = LOps.Root; + Value *ShAmt1 = LOps.Shift; + if (LOps.FoundRoot == false && + (match(X, m_OneUse(m_ZExt(m_Instruction(L1)))) || + match(X, m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1)))), + m_Value(ShAmt1)))))) { + LI1 = dyn_cast(L1); + } + LoadInst *LI2 = dyn_cast(L2); + + // Check if loads are same, atomic, volatile and having same address space. + if (LI1 == LI2 || !LI1 || !LI2 || !LI1->isSimple() || !LI2->isSimple() || + LI1->getPointerAddressSpace() != LI2->getPointerAddressSpace()) + return false; + + // Check if Loads come from same BB. + if (LI1->getParent() != LI2->getParent()) + return false; + + // Swap loads if LI1 comes later as we handle only forward loads. + // This is done as InstCombine folds lowest node forward loads to reverse. + // The implementation will be subsequently extended to handle all reverse + // loads. + if (!LI1->comesBefore(LI2)) { + if (LOps.FoundRoot == false) { + std::swap(LI1, LI2); + std::swap(ShAmt1, ShAmt2); + } else + return false; + } + + // Find the data layout + bool IsBigEndian = DL.isBigEndian(); + + // Check if loads are consecutive and same size. + Value *Load1Ptr = LI1->getPointerOperand(); + APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0); + Load1Ptr = + Load1Ptr->stripAndAccumulateConstantOffsets(DL, Offset1, + /* AllowNonInbounds */ true); + + Value *Load2Ptr = LI2->getPointerOperand(); + APInt Offset2(DL.getIndexTypeSizeInBits(Load2Ptr->getType()), 0); + Load2Ptr = + Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2, + /* AllowNonInbounds */ true); + + // Verify if both loads have same base pointers and load sizes are same. + uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits(); + uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits(); + if (Load1Ptr != Load2Ptr || LoadSize1 != LoadSize2) + return false; + + // Support Loadsizes greater or equal to 8bits and only power of 2. + if (LoadSize1 < 8 || !isPowerOf2_64(LoadSize1)) + return false; + + // Alias Analysis to check for store b/w the loads. + MemoryLocation Loc = MemoryLocation::get(LI2); + unsigned NumScanned = 0; + for (Instruction &Inst : make_range(LI1->getIterator(), LI2->getIterator())) { + if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc))) + return false; + if (++NumScanned > MaxInstrsToScan) + return false; + } + + // Big endian swap the shifts + if (IsBigEndian) + std::swap(ShAmt1, ShAmt2); + + // Find Shifts values. + const APInt *Temp; + uint64_t Shift1 = 0, Shift2 = 0; + if (ShAmt1 && match(ShAmt1, m_APInt(Temp))) + Shift1 = Temp->getZExtValue(); + if (ShAmt2 && match(ShAmt2, m_APInt(Temp))) + Shift2 = Temp->getZExtValue(); + + // First load is always LI1. This is where we put the new load. + // Use the merged load size available from LI1, if we already combined loads. + if (LOps.FoundRoot) + LoadSize1 = LOps.LoadSize; + + // Verify if shift amount and load index aligns and verifies that loads + // are consecutive. + uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1; + uint64_t PrevSize = + DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), LoadSize1)); + if ((Shift2 - Shift1) != ShiftDiff || (Offset2 - Offset1) != PrevSize) + return false; + + // Update LOps + AAMDNodes AATags1 = LOps.AATags; + AAMDNodes AATags2 = LI2->getAAMetadata(); + if (LOps.FoundRoot == false) { + LOps.FoundRoot = true; + LOps.LoadSize = LoadSize1 + LoadSize2; + AATags1 = LI1->getAAMetadata(); + } else + LOps.LoadSize = LOps.LoadSize + LoadSize2; + + // Concatenate the AATags of the Merged Loads. + LOps.AATags = AATags1.concat(AATags2); + + LOps.Root = LI1; + LOps.Shift = ShAmt1; + LOps.ZextType = X->getType(); + return true; +} + +// For a given BB instruction, evaluate all loads in the chain that form a +// pattern which suggests that the loads can be combined. The one and only use +// of the loads is to form a wider load. +static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, + TargetTransformInfo &TTI, AliasAnalysis &AA) { + LoadOps LOps; + if (!foldLoadsRecursive(&I, LOps, DL, AA) || !LOps.FoundRoot) + return false; + + IRBuilder<> Builder(&I); + LoadInst *NewLoad = nullptr, *LI1 = LOps.Root; + + // TTI based checks if we want to proceed with wider load + bool Allowed = + TTI.isTypeLegal(IntegerType::get(I.getContext(), LOps.LoadSize)); + if (!Allowed) + return false; + + unsigned AS = LI1->getPointerAddressSpace(); + bool Fast = false; + Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize, + AS, LI1->getAlign(), &Fast); + if (!Allowed || !Fast) + return false; + + // New load can be generated + Value *Load1Ptr = LI1->getPointerOperand(); + Builder.SetInsertPoint(LI1); + NewLoad = Builder.CreateAlignedLoad( + IntegerType::get(Load1Ptr->getContext(), LOps.LoadSize), Load1Ptr, + LI1->getAlign(), LI1->isVolatile(), ""); + NewLoad->takeName(LI1); + // Set the New Load AATags Metadata. + if (LOps.AATags) + NewLoad->setAAMetadata(LOps.AATags); + + Value *NewOp = NewLoad; + // Check if zero extend needed. + if (LOps.ZextType) + NewOp = Builder.CreateZExt(NewOp, LOps.ZextType); + + // Check if shift needed. We need to shift with the amount of load1 + // shift if not zero. + if (LOps.Shift) + NewOp = Builder.CreateShl(NewOp, LOps.Shift); + I.replaceAllUsesWith(NewOp); + + return true; +} + /// This is the entry point for folds that could be implemented in regular /// InstCombine, but they are separated because they are not expected to /// occur frequently and/or have more than a constant-length pattern match. static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, - TargetLibraryInfo &TLI) { + TargetLibraryInfo &TLI, AliasAnalysis &AA) { bool MadeChange = false; for (BasicBlock &BB : F) { // Ignore unreachable basic blocks. if (!DT.isReachableFromEntry(&BB)) continue; + const DataLayout &DL = F.getParent()->getDataLayout(); + // Walk the block backwards for efficiency. We're matching a chain of // use->defs, so we're more likely to succeed by starting from the bottom. // Also, we want to avoid matching partial patterns. @@ -658,6 +858,7 @@ MadeChange |= tryToRecognizePopCount(I); MadeChange |= tryToFPToSat(I, TTI); MadeChange |= tryToRecognizeTableBasedCttz(I); + MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA); // NOTE: This function introduces erasing of the instruction `I`, so it // needs to be called at the end of this sequence, otherwise we may make // bugs. @@ -676,12 +877,13 @@ /// This is the entry point for all transforms. Pass manager differences are /// handled in the callers of this function. static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI, - TargetLibraryInfo &TLI, DominatorTree &DT) { + TargetLibraryInfo &TLI, DominatorTree &DT, + AliasAnalysis &AA) { bool MadeChange = false; const DataLayout &DL = F.getParent()->getDataLayout(); TruncInstCombine TIC(AC, TLI, DL, DT); MadeChange |= TIC.run(F); - MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI); + MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA); return MadeChange; } @@ -696,6 +898,7 @@ AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); } bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) { @@ -703,7 +906,8 @@ auto &TLI = getAnalysis().getTLI(F); auto &DT = getAnalysis().getDomTree(); auto &TTI = getAnalysis().getTTI(F); - return runImpl(F, AC, TTI, TLI, DT); + auto &AA = getAnalysis().getAAResults(); + return runImpl(F, AC, TTI, TLI, DT, AA); } PreservedAnalyses AggressiveInstCombinePass::run(Function &F, @@ -712,7 +916,8 @@ auto &TLI = AM.getResult(F); auto &DT = AM.getResult(F); auto &TTI = AM.getResult(F); - if (!runImpl(F, AC, TTI, TLI, DT)) { + auto &AA = AM.getResult(F); + if (!runImpl(F, AC, TTI, TLI, DT, AA)) { // No changes, all analyses are preserved. return PreservedAnalyses::all(); } Index: llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll =================================================================== --- llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll +++ llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=aggressive-instcombine -S -mtriple aarch64 -data-layout="e-n64" | FileCheck %s --check-prefixes=ALL -; RUN: opt < %s -passes=aggressive-instcombine -S -mtriple aarch64 -data-layout="E-n64" | FileCheck %s --check-prefixes=ALL +; RUN: opt < %s -passes=aggressive-instcombine -S -mtriple aarch64 -data-layout="e-n64" | FileCheck %s --check-prefixes=ALL,LE +; RUN: opt < %s -passes=aggressive-instcombine -S -mtriple aarch64 -data-layout="E-n64" | FileCheck %s --check-prefixes=ALL,BE define i16 @loadCombine_2consecutive(ptr %p) { ; @@ -46,25 +46,29 @@ } define i32 @loadCombine_4consecutive(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive( +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -90,25 +94,29 @@ } define i32 @loadCombine_4consecutive_BE(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_BE( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 -; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_BE( +; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; BE-NEXT: ret i32 [[L1]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -134,26 +142,31 @@ } define i32 @loadCombine_4consecutive_alias(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_alias( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: store i8 10, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_alias( +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: store i8 10, ptr [[P]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_alias( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: store i8 10, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -180,26 +193,31 @@ } define i32 @loadCombine_4consecutive_alias_BE(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_alias_BE( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: store i8 10, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 -; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_alias_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: store i8 10, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_alias_BE( +; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; BE-NEXT: store i8 10, ptr [[P]], align 1 +; BE-NEXT: ret i32 [[L1]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -711,25 +729,40 @@ } define i128 @loadCombine_i128(ptr %p) { -; ALL-LABEL: @loadCombine_i128( -; ALL-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4 -; ALL-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4 -; ALL-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4 -; ALL-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4 -; ALL-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128 -; ALL-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128 -; ALL-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128 -; ALL-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128 -; ALL-NEXT: [[S2:%.*]] = shl i128 [[E2]], 32 -; ALL-NEXT: [[S3:%.*]] = shl i128 [[E3]], 64 -; ALL-NEXT: [[S4:%.*]] = shl i128 [[E4]], 96 -; ALL-NEXT: [[O1:%.*]] = or i128 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i128 [[O2]], [[S4]] -; ALL-NEXT: ret i128 [[O3]] +; LE-LABEL: @loadCombine_i128( +; LE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i64, ptr [[P]], align 4 +; LE-NEXT: [[TMP1:%.*]] = zext i64 [[L1]] to i128 +; LE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4 +; LE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4 +; LE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128 +; LE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128 +; LE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 64 +; LE-NEXT: [[S4:%.*]] = shl i128 [[E4]], 96 +; LE-NEXT: [[O2:%.*]] = or i128 [[TMP1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[S4]] +; LE-NEXT: ret i128 [[O3]] +; +; BE-LABEL: @loadCombine_i128( +; BE-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4 +; BE-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4 +; BE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4 +; BE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4 +; BE-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128 +; BE-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128 +; BE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128 +; BE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128 +; BE-NEXT: [[S2:%.*]] = shl i128 [[E2]], 32 +; BE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 64 +; BE-NEXT: [[S4:%.*]] = shl i128 [[E4]], 96 +; BE-NEXT: [[O1:%.*]] = or i128 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[S4]] +; BE-NEXT: ret i128 [[O3]] ; %p1 = getelementptr i32, ptr %p, i32 1 %p2 = getelementptr i32, ptr %p, i32 2 @@ -755,25 +788,40 @@ } define i128 @loadCombine_i128_BE(ptr %p) { -; ALL-LABEL: @loadCombine_i128_BE( -; ALL-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4 -; ALL-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4 -; ALL-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4 -; ALL-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4 -; ALL-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128 -; ALL-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128 -; ALL-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128 -; ALL-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128 -; ALL-NEXT: [[S1:%.*]] = shl i128 [[E1]], 96 -; ALL-NEXT: [[S2:%.*]] = shl i128 [[E2]], 64 -; ALL-NEXT: [[S3:%.*]] = shl i128 [[E3]], 32 -; ALL-NEXT: [[O1:%.*]] = or i128 [[S1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i128 [[O2]], [[E4]] -; ALL-NEXT: ret i128 [[O3]] +; LE-LABEL: @loadCombine_i128_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4 +; LE-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4 +; LE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4 +; LE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4 +; LE-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128 +; LE-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128 +; LE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128 +; LE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128 +; LE-NEXT: [[S1:%.*]] = shl i128 [[E1]], 96 +; LE-NEXT: [[S2:%.*]] = shl i128 [[E2]], 64 +; LE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 32 +; LE-NEXT: [[O1:%.*]] = or i128 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[E4]] +; LE-NEXT: ret i128 [[O3]] +; +; BE-LABEL: @loadCombine_i128_BE( +; BE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i64, ptr [[P]], align 4 +; BE-NEXT: [[TMP1:%.*]] = zext i64 [[L1]] to i128 +; BE-NEXT: [[TMP2:%.*]] = shl i128 [[TMP1]], 64 +; BE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4 +; BE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4 +; BE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128 +; BE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128 +; BE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 32 +; BE-NEXT: [[O2:%.*]] = or i128 [[TMP2]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[E4]] +; BE-NEXT: ret i128 [[O3]] ; %p1 = getelementptr i32, ptr %p, i32 1 %p2 = getelementptr i32, ptr %p, i32 2 @@ -799,25 +847,29 @@ } define i64 @loadCombine_i64(ptr %p) { -; ALL-LABEL: @loadCombine_i64( -; ALL-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2 -; ALL-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2 -; ALL-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2 -; ALL-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2 -; ALL-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64 -; ALL-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64 -; ALL-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64 -; ALL-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64 -; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 16 -; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 32 -; ALL-NEXT: [[S4:%.*]] = shl i64 [[E4]], 48 -; ALL-NEXT: [[O1:%.*]] = or i64 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S4]] -; ALL-NEXT: ret i64 [[O3]] +; LE-LABEL: @loadCombine_i64( +; LE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 2 +; LE-NEXT: ret i64 [[L1]] +; +; BE-LABEL: @loadCombine_i64( +; BE-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2 +; BE-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2 +; BE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2 +; BE-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2 +; BE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64 +; BE-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64 +; BE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64 +; BE-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64 +; BE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 16 +; BE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 32 +; BE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 48 +; BE-NEXT: [[O1:%.*]] = or i64 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S4]] +; BE-NEXT: ret i64 [[O3]] ; %p1 = getelementptr i16, ptr %p, i32 1 %p2 = getelementptr i16, ptr %p, i32 2 @@ -843,25 +895,29 @@ } define i64 @loadCombine_i64_BE(ptr %p) { -; ALL-LABEL: @loadCombine_i64_BE( -; ALL-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2 -; ALL-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2 -; ALL-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2 -; ALL-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2 -; ALL-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64 -; ALL-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64 -; ALL-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64 -; ALL-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64 -; ALL-NEXT: [[S1:%.*]] = shl i64 [[E1]], 48 -; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 32 -; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16 -; ALL-NEXT: [[O1:%.*]] = or i64 [[S1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i64 [[O2]], [[E4]] -; ALL-NEXT: ret i64 [[O3]] +; LE-LABEL: @loadCombine_i64_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2 +; LE-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2 +; LE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2 +; LE-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2 +; LE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64 +; LE-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64 +; LE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64 +; LE-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64 +; LE-NEXT: [[S1:%.*]] = shl i64 [[E1]], 48 +; LE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 32 +; LE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16 +; LE-NEXT: [[O1:%.*]] = or i64 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i64 [[O2]], [[E4]] +; LE-NEXT: ret i64 [[O3]] +; +; BE-LABEL: @loadCombine_i64_BE( +; BE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 2 +; BE-NEXT: ret i64 [[L1]] ; %p1 = getelementptr i16, ptr %p, i32 1 %p2 = getelementptr i16, ptr %p, i32 2 @@ -975,46 +1031,51 @@ } define i64 @load64_farLoads(ptr %ptr) { -; ALL-LABEL: @load64_farLoads( -; ALL-NEXT: entry: -; ALL-NEXT: [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1 -; ALL-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i64 -; ALL-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 -; ALL-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; ALL-NEXT: [[CONV2:%.*]] = zext i8 [[TMP1]] to i64 -; ALL-NEXT: [[SHL:%.*]] = shl i64 [[CONV2]], 8 -; ALL-NEXT: [[OR:%.*]] = or i64 [[CONV]], [[SHL]] -; ALL-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2 -; ALL-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1 -; ALL-NEXT: [[CONV4:%.*]] = zext i8 [[TMP2]] to i64 -; ALL-NEXT: [[SHL5:%.*]] = shl i64 [[CONV4]], 16 -; ALL-NEXT: [[OR6:%.*]] = or i64 [[OR]], [[SHL5]] -; ALL-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 -; ALL-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1 -; ALL-NEXT: [[CONV8:%.*]] = zext i8 [[TMP3]] to i64 -; ALL-NEXT: [[SHL9:%.*]] = shl i64 [[CONV8]], 24 -; ALL-NEXT: [[OR10:%.*]] = or i64 [[OR6]], [[SHL9]] -; ALL-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4 -; ALL-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX11]], align 1 -; ALL-NEXT: [[CONV12:%.*]] = zext i8 [[TMP4]] to i64 -; ALL-NEXT: [[SHL13:%.*]] = shl i64 [[CONV12]], 32 -; ALL-NEXT: [[OR14:%.*]] = or i64 [[OR10]], [[SHL13]] -; ALL-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 5 -; ALL-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX15]], align 1 -; ALL-NEXT: [[CONV16:%.*]] = zext i8 [[TMP5]] to i64 -; ALL-NEXT: [[SHL17:%.*]] = shl i64 [[CONV16]], 40 -; ALL-NEXT: [[OR18:%.*]] = or i64 [[OR14]], [[SHL17]] -; ALL-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 6 -; ALL-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX19]], align 1 -; ALL-NEXT: [[CONV20:%.*]] = zext i8 [[TMP6]] to i64 -; ALL-NEXT: [[SHL21:%.*]] = shl i64 [[CONV20]], 48 -; ALL-NEXT: [[OR22:%.*]] = or i64 [[OR18]], [[SHL21]] -; ALL-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 7 -; ALL-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX23]], align 1 -; ALL-NEXT: [[CONV24:%.*]] = zext i8 [[TMP7]] to i64 -; ALL-NEXT: [[SHL25:%.*]] = shl i64 [[CONV24]], 56 -; ALL-NEXT: [[OR26:%.*]] = or i64 [[OR22]], [[SHL25]] -; ALL-NEXT: ret i64 [[OR26]] +; LE-LABEL: @load64_farLoads( +; LE-NEXT: entry: +; LE-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR:%.*]], align 1 +; LE-NEXT: ret i64 [[TMP0]] +; +; BE-LABEL: @load64_farLoads( +; BE-NEXT: entry: +; BE-NEXT: [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1 +; BE-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i64 +; BE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 +; BE-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; BE-NEXT: [[CONV2:%.*]] = zext i8 [[TMP1]] to i64 +; BE-NEXT: [[SHL:%.*]] = shl i64 [[CONV2]], 8 +; BE-NEXT: [[OR:%.*]] = or i64 [[CONV]], [[SHL]] +; BE-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2 +; BE-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1 +; BE-NEXT: [[CONV4:%.*]] = zext i8 [[TMP2]] to i64 +; BE-NEXT: [[SHL5:%.*]] = shl i64 [[CONV4]], 16 +; BE-NEXT: [[OR6:%.*]] = or i64 [[OR]], [[SHL5]] +; BE-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 +; BE-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1 +; BE-NEXT: [[CONV8:%.*]] = zext i8 [[TMP3]] to i64 +; BE-NEXT: [[SHL9:%.*]] = shl i64 [[CONV8]], 24 +; BE-NEXT: [[OR10:%.*]] = or i64 [[OR6]], [[SHL9]] +; BE-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4 +; BE-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX11]], align 1 +; BE-NEXT: [[CONV12:%.*]] = zext i8 [[TMP4]] to i64 +; BE-NEXT: [[SHL13:%.*]] = shl i64 [[CONV12]], 32 +; BE-NEXT: [[OR14:%.*]] = or i64 [[OR10]], [[SHL13]] +; BE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 5 +; BE-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX15]], align 1 +; BE-NEXT: [[CONV16:%.*]] = zext i8 [[TMP5]] to i64 +; BE-NEXT: [[SHL17:%.*]] = shl i64 [[CONV16]], 40 +; BE-NEXT: [[OR18:%.*]] = or i64 [[OR14]], [[SHL17]] +; BE-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 6 +; BE-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX19]], align 1 +; BE-NEXT: [[CONV20:%.*]] = zext i8 [[TMP6]] to i64 +; BE-NEXT: [[SHL21:%.*]] = shl i64 [[CONV20]], 48 +; BE-NEXT: [[OR22:%.*]] = or i64 [[OR18]], [[SHL21]] +; BE-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 7 +; BE-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX23]], align 1 +; BE-NEXT: [[CONV24:%.*]] = zext i8 [[TMP7]] to i64 +; BE-NEXT: [[SHL25:%.*]] = shl i64 [[CONV24]], 56 +; BE-NEXT: [[OR26:%.*]] = or i64 [[OR22]], [[SHL25]] +; BE-NEXT: ret i64 [[OR26]] ; entry: %0 = load i8, ptr %ptr, align 1 @@ -1058,26 +1119,31 @@ } define i32 @loadCombine_4consecutive_metadata(ptr %p, ptr %pstr) { -; ALL-LABEL: @loadCombine_4consecutive_metadata( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0 -; ALL-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_metadata( +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope !0 +; LE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_metadata( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0 +; BE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1151,3 +1217,215 @@ %o3 = or i16 %o2, %s4 ret i16 %o3 } + +define i32 @loadCombine_4consecutive_rev(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_rev( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[S4]], [[S3]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S2]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s4, %s3 + %o2 = or i32 %o1, %s2 + %o3 = or i32 %o2, %e1 + ret i32 %o3 +} + +define i64 @loadCombine_8consecutive_rev(ptr %p) { +; ALL-LABEL: @loadCombine_8consecutive_rev( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4 +; ALL-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5 +; ALL-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6 +; ALL-NEXT: [[P7:%.*]] = getelementptr i8, ptr [[P]], i32 7 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1 +; ALL-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1 +; ALL-NEXT: [[L7:%.*]] = load i8, ptr [[P6]], align 1 +; ALL-NEXT: [[L8:%.*]] = load i8, ptr [[P7]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64 +; ALL-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64 +; ALL-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64 +; ALL-NEXT: [[E7:%.*]] = zext i8 [[L7]] to i64 +; ALL-NEXT: [[E8:%.*]] = zext i8 [[L8]] to i64 +; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i64 [[E4]], 24 +; ALL-NEXT: [[S5:%.*]] = shl i64 [[E5]], 32 +; ALL-NEXT: [[S6:%.*]] = shl i64 [[E6]], 40 +; ALL-NEXT: [[S7:%.*]] = shl i64 [[E7]], 48 +; ALL-NEXT: [[S8:%.*]] = shl i64 [[E8]], 56 +; ALL-NEXT: [[O7:%.*]] = or i64 [[S8]], [[S7]] +; ALL-NEXT: [[O6:%.*]] = or i64 [[O7]], [[S6]] +; ALL-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]] +; ALL-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]] +; ALL-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]] +; ALL-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]] +; ALL-NEXT: [[O1:%.*]] = or i64 [[O2]], [[E1]] +; ALL-NEXT: ret i64 [[O1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %p4 = getelementptr i8, ptr %p, i32 4 + %p5 = getelementptr i8, ptr %p, i32 5 + %p6 = getelementptr i8, ptr %p, i32 6 + %p7 = getelementptr i8, ptr %p, i32 7 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + %l5 = load i8, ptr %p4 + %l6 = load i8, ptr %p5 + %l7 = load i8, ptr %p6 + %l8 = load i8, ptr %p7 + + %e1 = zext i8 %l1 to i64 + %e2 = zext i8 %l2 to i64 + %e3 = zext i8 %l3 to i64 + %e4 = zext i8 %l4 to i64 + %e5 = zext i8 %l5 to i64 + %e6 = zext i8 %l6 to i64 + %e7 = zext i8 %l7 to i64 + %e8 = zext i8 %l8 to i64 + + %s2 = shl i64 %e2, 8 + %s3 = shl i64 %e3, 16 + %s4 = shl i64 %e4, 24 + %s5 = shl i64 %e5, 32 + %s6 = shl i64 %e6, 40 + %s7 = shl i64 %e7, 48 + %s8 = shl i64 %e8, 56 + + %o7 = or i64 %s8, %s7 + %o6 = or i64 %o7, %s6 + %o5 = or i64 %o6, %s5 + %o4 = or i64 %o5, %s4 + %o3 = or i64 %o4, %s3 + %o2 = or i64 %o3, %s2 + %o1 = or i64 %o2, %e1 + ret i64 %o1 +} + +define i64 @loadCombine_8consecutive_rev_BE(ptr %p) { +; ALL-LABEL: @loadCombine_8consecutive_rev_BE( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4 +; ALL-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5 +; ALL-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6 +; ALL-NEXT: [[P7:%.*]] = getelementptr i8, ptr [[P]], i32 7 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1 +; ALL-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1 +; ALL-NEXT: [[L7:%.*]] = load i8, ptr [[P6]], align 1 +; ALL-NEXT: [[L8:%.*]] = load i8, ptr [[P7]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64 +; ALL-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64 +; ALL-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64 +; ALL-NEXT: [[E7:%.*]] = zext i8 [[L7]] to i64 +; ALL-NEXT: [[E8:%.*]] = zext i8 [[L8]] to i64 +; ALL-NEXT: [[S1:%.*]] = shl i64 [[E1]], 56 +; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 48 +; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 40 +; ALL-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32 +; ALL-NEXT: [[S5:%.*]] = shl i64 [[E5]], 24 +; ALL-NEXT: [[S6:%.*]] = shl i64 [[E6]], 16 +; ALL-NEXT: [[S7:%.*]] = shl i64 [[E7]], 8 +; ALL-NEXT: [[O7:%.*]] = or i64 [[E8]], [[S7]] +; ALL-NEXT: [[O6:%.*]] = or i64 [[O7]], [[S6]] +; ALL-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]] +; ALL-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]] +; ALL-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]] +; ALL-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]] +; ALL-NEXT: [[O1:%.*]] = or i64 [[O2]], [[S1]] +; ALL-NEXT: ret i64 [[O1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %p4 = getelementptr i8, ptr %p, i32 4 + %p5 = getelementptr i8, ptr %p, i32 5 + %p6 = getelementptr i8, ptr %p, i32 6 + %p7 = getelementptr i8, ptr %p, i32 7 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + %l5 = load i8, ptr %p4 + %l6 = load i8, ptr %p5 + %l7 = load i8, ptr %p6 + %l8 = load i8, ptr %p7 + + %e1 = zext i8 %l1 to i64 + %e2 = zext i8 %l2 to i64 + %e3 = zext i8 %l3 to i64 + %e4 = zext i8 %l4 to i64 + %e5 = zext i8 %l5 to i64 + %e6 = zext i8 %l6 to i64 + %e7 = zext i8 %l7 to i64 + %e8 = zext i8 %l8 to i64 + + %s1 = shl i64 %e1, 56 + %s2 = shl i64 %e2, 48 + %s3 = shl i64 %e3, 40 + %s4 = shl i64 %e4, 32 + %s5 = shl i64 %e5, 24 + %s6 = shl i64 %e6, 16 + %s7 = shl i64 %e7, 8 + + %o7 = or i64 %e8, %s7 + %o6 = or i64 %o7, %s6 + %o5 = or i64 %o6, %s5 + %o4 = or i64 %o5, %s4 + %o3 = or i64 %o4, %s3 + %o2 = or i64 %o3, %s2 + %o1 = or i64 %o2, %s1 + ret i64 %o1 +} Index: llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll =================================================================== --- llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll +++ llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll @@ -1,18 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=aggressive-instcombine -mtriple x86_64-none-eabi -mattr=avx2 -data-layout="e-n64" -S | FileCheck %s --check-prefixes=ALL -; RUN: opt < %s -passes=aggressive-instcombine -mtriple x86_64-none-eabi -mattr=avx2 -data-layout="E-n64" -S | FileCheck %s --check-prefixes=ALL +; RUN: opt < %s -passes=aggressive-instcombine -mtriple x86_64-none-eabi -mattr=avx2 -data-layout="e-n64" -S | FileCheck %s --check-prefixes=ALL,LE +; RUN: opt < %s -passes=aggressive-instcombine -mtriple x86_64-none-eabi -mattr=avx2 -data-layout="E-n64" -S | FileCheck %s --check-prefixes=ALL,BE define i16 @loadCombine_2consecutive(ptr %p) { ; -; ALL-LABEL: @loadCombine_2consecutive( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 -; ALL-NEXT: [[S2:%.*]] = shl i16 [[E2]], 8 -; ALL-NEXT: [[O1:%.*]] = or i16 [[E1]], [[S2]] -; ALL-NEXT: ret i16 [[O1]] +; LE-LABEL: @loadCombine_2consecutive( +; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P:%.*]], align 1 +; LE-NEXT: ret i16 [[L1]] +; +; BE-LABEL: @loadCombine_2consecutive( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 +; BE-NEXT: [[S2:%.*]] = shl i16 [[E2]], 8 +; BE-NEXT: [[O1:%.*]] = or i16 [[E1]], [[S2]] +; BE-NEXT: ret i16 [[O1]] ; %p1 = getelementptr i8, ptr %p, i32 1 %l1 = load i8, ptr %p @@ -25,15 +29,19 @@ } define i16 @loadCombine_2consecutive_BE(ptr %p) { -; ALL-LABEL: @loadCombine_2consecutive_BE( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 -; ALL-NEXT: [[S1:%.*]] = shl i16 [[E1]], 8 -; ALL-NEXT: [[O1:%.*]] = or i16 [[S1]], [[E2]] -; ALL-NEXT: ret i16 [[O1]] +; LE-LABEL: @loadCombine_2consecutive_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 +; LE-NEXT: [[S1:%.*]] = shl i16 [[E1]], 8 +; LE-NEXT: [[O1:%.*]] = or i16 [[S1]], [[E2]] +; LE-NEXT: ret i16 [[O1]] +; +; BE-LABEL: @loadCombine_2consecutive_BE( +; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P:%.*]], align 1 +; BE-NEXT: ret i16 [[L1]] ; %p1 = getelementptr i8, ptr %p, i32 1 %l1 = load i8, ptr %p @@ -46,25 +54,29 @@ } define i32 @loadCombine_4consecutive(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive( +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -90,25 +102,29 @@ } define i32 @loadCombine_4consecutive_BE(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_BE( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 -; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_BE( +; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; BE-NEXT: ret i32 [[L1]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -134,26 +150,31 @@ } define i32 @loadCombine_4consecutive_alias(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_alias( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: store i8 10, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_alias( +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: store i8 10, ptr [[P]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_alias( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: store i8 10, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -180,26 +201,31 @@ } define i32 @loadCombine_4consecutive_alias_BE(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_alias_BE( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: store i8 10, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 -; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_alias_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: store i8 10, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_alias_BE( +; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; BE-NEXT: store i8 10, ptr [[P]], align 1 +; BE-NEXT: ret i32 [[L1]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -226,26 +252,42 @@ } define i32 @loadCombine_4consecutive_alias2(ptr %p, ptr %pstr) { -; ALL-LABEL: @loadCombine_4consecutive_alias2( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_alias2( +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 +; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; LE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; LE-NEXT: [[O2:%.*]] = or i32 [[TMP1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_alias2( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -272,26 +314,42 @@ } define i32 @loadCombine_4consecutive_alias2_BE(ptr %p, ptr %pstr) { -; ALL-LABEL: @loadCombine_4consecutive_alias2_BE( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 -; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_alias2_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_alias2_BE( +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 +; BE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32 +; BE-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], 16 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; BE-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -318,27 +376,44 @@ } define i32 @loadCombine_4consecutive_alias3(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_alias3( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: store i8 10, ptr [[P3]], align 1 -; ALL-NEXT: store i8 5, ptr [[P]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_alias3( +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 +; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: store i8 10, ptr [[P3]], align 1 +; LE-NEXT: store i8 5, ptr [[P]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; LE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; LE-NEXT: [[O2:%.*]] = or i32 [[TMP1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_alias3( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: store i8 10, ptr [[P3]], align 1 +; BE-NEXT: store i8 5, ptr [[P]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -366,27 +441,44 @@ } define i32 @loadCombine_4consecutive_alias3_BE(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_alias3_BE( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: store i8 10, ptr [[P3]], align 1 -; ALL-NEXT: store i8 5, ptr [[P]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 -; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_alias3_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: store i8 10, ptr [[P3]], align 1 +; LE-NEXT: store i8 5, ptr [[P]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_alias3_BE( +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 +; BE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32 +; BE-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], 16 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: store i8 10, ptr [[P3]], align 1 +; BE-NEXT: store i8 5, ptr [[P]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; BE-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -677,20 +769,30 @@ } define i32 @loadCombine_parLoad1(ptr %p) { -; ALL-LABEL: @loadCombine_parLoad1( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: ret i32 [[O2]] +; LE-LABEL: @loadCombine_parLoad1( +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2 +; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 +; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; LE-NEXT: [[O2:%.*]] = or i32 [[TMP1]], [[S3]] +; LE-NEXT: ret i32 [[O2]] +; +; BE-LABEL: @loadCombine_parLoad1( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: ret i32 [[O2]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -711,25 +813,40 @@ } define i128 @loadCombine_i128(ptr %p) { -; ALL-LABEL: @loadCombine_i128( -; ALL-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4 -; ALL-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4 -; ALL-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4 -; ALL-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4 -; ALL-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128 -; ALL-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128 -; ALL-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128 -; ALL-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128 -; ALL-NEXT: [[S2:%.*]] = shl i128 [[E2]], 32 -; ALL-NEXT: [[S3:%.*]] = shl i128 [[E3]], 64 -; ALL-NEXT: [[S4:%.*]] = shl i128 [[E4]], 96 -; ALL-NEXT: [[O1:%.*]] = or i128 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i128 [[O2]], [[S4]] -; ALL-NEXT: ret i128 [[O3]] +; LE-LABEL: @loadCombine_i128( +; LE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i64, ptr [[P]], align 4 +; LE-NEXT: [[TMP1:%.*]] = zext i64 [[L1]] to i128 +; LE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4 +; LE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4 +; LE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128 +; LE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128 +; LE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 64 +; LE-NEXT: [[S4:%.*]] = shl i128 [[E4]], 96 +; LE-NEXT: [[O2:%.*]] = or i128 [[TMP1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[S4]] +; LE-NEXT: ret i128 [[O3]] +; +; BE-LABEL: @loadCombine_i128( +; BE-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4 +; BE-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4 +; BE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4 +; BE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4 +; BE-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128 +; BE-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128 +; BE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128 +; BE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128 +; BE-NEXT: [[S2:%.*]] = shl i128 [[E2]], 32 +; BE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 64 +; BE-NEXT: [[S4:%.*]] = shl i128 [[E4]], 96 +; BE-NEXT: [[O1:%.*]] = or i128 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[S4]] +; BE-NEXT: ret i128 [[O3]] ; %p1 = getelementptr i32, ptr %p, i32 1 %p2 = getelementptr i32, ptr %p, i32 2 @@ -755,25 +872,40 @@ } define i128 @loadCombine_i128_BE(ptr %p) { -; ALL-LABEL: @loadCombine_i128_BE( -; ALL-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4 -; ALL-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4 -; ALL-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4 -; ALL-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4 -; ALL-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128 -; ALL-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128 -; ALL-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128 -; ALL-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128 -; ALL-NEXT: [[S1:%.*]] = shl i128 [[E1]], 96 -; ALL-NEXT: [[S2:%.*]] = shl i128 [[E2]], 64 -; ALL-NEXT: [[S3:%.*]] = shl i128 [[E3]], 32 -; ALL-NEXT: [[O1:%.*]] = or i128 [[S1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i128 [[O2]], [[E4]] -; ALL-NEXT: ret i128 [[O3]] +; LE-LABEL: @loadCombine_i128_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 4 +; LE-NEXT: [[L2:%.*]] = load i32, ptr [[P1]], align 4 +; LE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4 +; LE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4 +; LE-NEXT: [[E1:%.*]] = zext i32 [[L1]] to i128 +; LE-NEXT: [[E2:%.*]] = zext i32 [[L2]] to i128 +; LE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128 +; LE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128 +; LE-NEXT: [[S1:%.*]] = shl i128 [[E1]], 96 +; LE-NEXT: [[S2:%.*]] = shl i128 [[E2]], 64 +; LE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 32 +; LE-NEXT: [[O1:%.*]] = or i128 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i128 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[E4]] +; LE-NEXT: ret i128 [[O3]] +; +; BE-LABEL: @loadCombine_i128_BE( +; BE-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i64, ptr [[P]], align 4 +; BE-NEXT: [[TMP1:%.*]] = zext i64 [[L1]] to i128 +; BE-NEXT: [[TMP2:%.*]] = shl i128 [[TMP1]], 64 +; BE-NEXT: [[L3:%.*]] = load i32, ptr [[P2]], align 4 +; BE-NEXT: [[L4:%.*]] = load i32, ptr [[P3]], align 4 +; BE-NEXT: [[E3:%.*]] = zext i32 [[L3]] to i128 +; BE-NEXT: [[E4:%.*]] = zext i32 [[L4]] to i128 +; BE-NEXT: [[S3:%.*]] = shl i128 [[E3]], 32 +; BE-NEXT: [[O2:%.*]] = or i128 [[TMP2]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i128 [[O2]], [[E4]] +; BE-NEXT: ret i128 [[O3]] ; %p1 = getelementptr i32, ptr %p, i32 1 %p2 = getelementptr i32, ptr %p, i32 2 @@ -799,25 +931,29 @@ } define i64 @loadCombine_i64(ptr %p) { -; ALL-LABEL: @loadCombine_i64( -; ALL-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2 -; ALL-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2 -; ALL-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2 -; ALL-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2 -; ALL-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64 -; ALL-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64 -; ALL-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64 -; ALL-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64 -; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 16 -; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 32 -; ALL-NEXT: [[S4:%.*]] = shl i64 [[E4]], 48 -; ALL-NEXT: [[O1:%.*]] = or i64 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S4]] -; ALL-NEXT: ret i64 [[O3]] +; LE-LABEL: @loadCombine_i64( +; LE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 2 +; LE-NEXT: ret i64 [[L1]] +; +; BE-LABEL: @loadCombine_i64( +; BE-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2 +; BE-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2 +; BE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2 +; BE-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2 +; BE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64 +; BE-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64 +; BE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64 +; BE-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64 +; BE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 16 +; BE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 32 +; BE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 48 +; BE-NEXT: [[O1:%.*]] = or i64 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S4]] +; BE-NEXT: ret i64 [[O3]] ; %p1 = getelementptr i16, ptr %p, i32 1 %p2 = getelementptr i16, ptr %p, i32 2 @@ -843,25 +979,29 @@ } define i64 @loadCombine_i64_BE(ptr %p) { -; ALL-LABEL: @loadCombine_i64_BE( -; ALL-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2 -; ALL-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2 -; ALL-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2 -; ALL-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2 -; ALL-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64 -; ALL-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64 -; ALL-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64 -; ALL-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64 -; ALL-NEXT: [[S1:%.*]] = shl i64 [[E1]], 48 -; ALL-NEXT: [[S2:%.*]] = shl i64 [[E2]], 32 -; ALL-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16 -; ALL-NEXT: [[O1:%.*]] = or i64 [[S1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i64 [[O2]], [[E4]] -; ALL-NEXT: ret i64 [[O3]] +; LE-LABEL: @loadCombine_i64_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2 +; LE-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2 +; LE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2 +; LE-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2 +; LE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64 +; LE-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64 +; LE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64 +; LE-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64 +; LE-NEXT: [[S1:%.*]] = shl i64 [[E1]], 48 +; LE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 32 +; LE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16 +; LE-NEXT: [[O1:%.*]] = or i64 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i64 [[O2]], [[E4]] +; LE-NEXT: ret i64 [[O3]] +; +; BE-LABEL: @loadCombine_i64_BE( +; BE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 2 +; BE-NEXT: ret i64 [[L1]] ; %p1 = getelementptr i16, ptr %p, i32 1 %p2 = getelementptr i16, ptr %p, i32 2 @@ -976,46 +1116,51 @@ } define i64 @load64_farLoads(ptr %ptr) { -; ALL-LABEL: @load64_farLoads( -; ALL-NEXT: entry: -; ALL-NEXT: [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1 -; ALL-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i64 -; ALL-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 -; ALL-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; ALL-NEXT: [[CONV2:%.*]] = zext i8 [[TMP1]] to i64 -; ALL-NEXT: [[SHL:%.*]] = shl i64 [[CONV2]], 8 -; ALL-NEXT: [[OR:%.*]] = or i64 [[CONV]], [[SHL]] -; ALL-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2 -; ALL-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1 -; ALL-NEXT: [[CONV4:%.*]] = zext i8 [[TMP2]] to i64 -; ALL-NEXT: [[SHL5:%.*]] = shl i64 [[CONV4]], 16 -; ALL-NEXT: [[OR6:%.*]] = or i64 [[OR]], [[SHL5]] -; ALL-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 -; ALL-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1 -; ALL-NEXT: [[CONV8:%.*]] = zext i8 [[TMP3]] to i64 -; ALL-NEXT: [[SHL9:%.*]] = shl i64 [[CONV8]], 24 -; ALL-NEXT: [[OR10:%.*]] = or i64 [[OR6]], [[SHL9]] -; ALL-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4 -; ALL-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX11]], align 1 -; ALL-NEXT: [[CONV12:%.*]] = zext i8 [[TMP4]] to i64 -; ALL-NEXT: [[SHL13:%.*]] = shl i64 [[CONV12]], 32 -; ALL-NEXT: [[OR14:%.*]] = or i64 [[OR10]], [[SHL13]] -; ALL-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 5 -; ALL-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX15]], align 1 -; ALL-NEXT: [[CONV16:%.*]] = zext i8 [[TMP5]] to i64 -; ALL-NEXT: [[SHL17:%.*]] = shl i64 [[CONV16]], 40 -; ALL-NEXT: [[OR18:%.*]] = or i64 [[OR14]], [[SHL17]] -; ALL-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 6 -; ALL-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX19]], align 1 -; ALL-NEXT: [[CONV20:%.*]] = zext i8 [[TMP6]] to i64 -; ALL-NEXT: [[SHL21:%.*]] = shl i64 [[CONV20]], 48 -; ALL-NEXT: [[OR22:%.*]] = or i64 [[OR18]], [[SHL21]] -; ALL-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 7 -; ALL-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX23]], align 1 -; ALL-NEXT: [[CONV24:%.*]] = zext i8 [[TMP7]] to i64 -; ALL-NEXT: [[SHL25:%.*]] = shl i64 [[CONV24]], 56 -; ALL-NEXT: [[OR26:%.*]] = or i64 [[OR22]], [[SHL25]] -; ALL-NEXT: ret i64 [[OR26]] +; LE-LABEL: @load64_farLoads( +; LE-NEXT: entry: +; LE-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR:%.*]], align 1 +; LE-NEXT: ret i64 [[TMP0]] +; +; BE-LABEL: @load64_farLoads( +; BE-NEXT: entry: +; BE-NEXT: [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1 +; BE-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i64 +; BE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 +; BE-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; BE-NEXT: [[CONV2:%.*]] = zext i8 [[TMP1]] to i64 +; BE-NEXT: [[SHL:%.*]] = shl i64 [[CONV2]], 8 +; BE-NEXT: [[OR:%.*]] = or i64 [[CONV]], [[SHL]] +; BE-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2 +; BE-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1 +; BE-NEXT: [[CONV4:%.*]] = zext i8 [[TMP2]] to i64 +; BE-NEXT: [[SHL5:%.*]] = shl i64 [[CONV4]], 16 +; BE-NEXT: [[OR6:%.*]] = or i64 [[OR]], [[SHL5]] +; BE-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 +; BE-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1 +; BE-NEXT: [[CONV8:%.*]] = zext i8 [[TMP3]] to i64 +; BE-NEXT: [[SHL9:%.*]] = shl i64 [[CONV8]], 24 +; BE-NEXT: [[OR10:%.*]] = or i64 [[OR6]], [[SHL9]] +; BE-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4 +; BE-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX11]], align 1 +; BE-NEXT: [[CONV12:%.*]] = zext i8 [[TMP4]] to i64 +; BE-NEXT: [[SHL13:%.*]] = shl i64 [[CONV12]], 32 +; BE-NEXT: [[OR14:%.*]] = or i64 [[OR10]], [[SHL13]] +; BE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 5 +; BE-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX15]], align 1 +; BE-NEXT: [[CONV16:%.*]] = zext i8 [[TMP5]] to i64 +; BE-NEXT: [[SHL17:%.*]] = shl i64 [[CONV16]], 40 +; BE-NEXT: [[OR18:%.*]] = or i64 [[OR14]], [[SHL17]] +; BE-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 6 +; BE-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX19]], align 1 +; BE-NEXT: [[CONV20:%.*]] = zext i8 [[TMP6]] to i64 +; BE-NEXT: [[SHL21:%.*]] = shl i64 [[CONV20]], 48 +; BE-NEXT: [[OR22:%.*]] = or i64 [[OR18]], [[SHL21]] +; BE-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 7 +; BE-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX23]], align 1 +; BE-NEXT: [[CONV24:%.*]] = zext i8 [[TMP7]] to i64 +; BE-NEXT: [[SHL25:%.*]] = shl i64 [[CONV24]], 56 +; BE-NEXT: [[OR26:%.*]] = or i64 [[OR22]], [[SHL25]] +; BE-NEXT: ret i64 [[OR26]] ; entry: %0 = load i8, ptr %ptr, align 1 @@ -1059,26 +1204,31 @@ } define i32 @loadCombine_4consecutive_metadata(ptr %p, ptr %pstr) { -; ALL-LABEL: @loadCombine_4consecutive_metadata( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0 -; ALL-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_metadata( +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope !0 +; LE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_metadata( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0 +; BE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1152,3 +1302,301 @@ %o3 = or i16 %o2, %s4 ret i16 %o3 } + +define i32 @loadCombine_4consecutive_rev(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_rev( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 1 +; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L3]] to i32 +; LE-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], 16 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; LE-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[S2]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_rev( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[S4]], [[S3]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S2]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s4, %s3 + %o2 = or i32 %o1, %s2 + %o3 = or i32 %o2, %e1 + ret i32 %o3 +} + +define i64 @loadCombine_8consecutive_rev(ptr %p) { +; LE-LABEL: @loadCombine_8consecutive_rev( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4 +; LE-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5 +; LE-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1 +; LE-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1 +; LE-NEXT: [[L7:%.*]] = load i16, ptr [[P6]], align 1 +; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L7]] to i64 +; LE-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 48 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64 +; LE-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64 +; LE-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64 +; LE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 8 +; LE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16 +; LE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 24 +; LE-NEXT: [[S5:%.*]] = shl i64 [[E5]], 32 +; LE-NEXT: [[S6:%.*]] = shl i64 [[E6]], 40 +; LE-NEXT: [[O6:%.*]] = or i64 [[TMP2]], [[S6]] +; LE-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]] +; LE-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]] +; LE-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]] +; LE-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]] +; LE-NEXT: [[O1:%.*]] = or i64 [[O2]], [[E1]] +; LE-NEXT: ret i64 [[O1]] +; +; BE-LABEL: @loadCombine_8consecutive_rev( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4 +; BE-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5 +; BE-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6 +; BE-NEXT: [[P7:%.*]] = getelementptr i8, ptr [[P]], i32 7 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1 +; BE-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1 +; BE-NEXT: [[L7:%.*]] = load i8, ptr [[P6]], align 1 +; BE-NEXT: [[L8:%.*]] = load i8, ptr [[P7]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64 +; BE-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64 +; BE-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64 +; BE-NEXT: [[E7:%.*]] = zext i8 [[L7]] to i64 +; BE-NEXT: [[E8:%.*]] = zext i8 [[L8]] to i64 +; BE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 24 +; BE-NEXT: [[S5:%.*]] = shl i64 [[E5]], 32 +; BE-NEXT: [[S6:%.*]] = shl i64 [[E6]], 40 +; BE-NEXT: [[S7:%.*]] = shl i64 [[E7]], 48 +; BE-NEXT: [[S8:%.*]] = shl i64 [[E8]], 56 +; BE-NEXT: [[O7:%.*]] = or i64 [[S8]], [[S7]] +; BE-NEXT: [[O6:%.*]] = or i64 [[O7]], [[S6]] +; BE-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]] +; BE-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]] +; BE-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]] +; BE-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]] +; BE-NEXT: [[O1:%.*]] = or i64 [[O2]], [[E1]] +; BE-NEXT: ret i64 [[O1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %p4 = getelementptr i8, ptr %p, i32 4 + %p5 = getelementptr i8, ptr %p, i32 5 + %p6 = getelementptr i8, ptr %p, i32 6 + %p7 = getelementptr i8, ptr %p, i32 7 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + %l5 = load i8, ptr %p4 + %l6 = load i8, ptr %p5 + %l7 = load i8, ptr %p6 + %l8 = load i8, ptr %p7 + + %e1 = zext i8 %l1 to i64 + %e2 = zext i8 %l2 to i64 + %e3 = zext i8 %l3 to i64 + %e4 = zext i8 %l4 to i64 + %e5 = zext i8 %l5 to i64 + %e6 = zext i8 %l6 to i64 + %e7 = zext i8 %l7 to i64 + %e8 = zext i8 %l8 to i64 + + %s2 = shl i64 %e2, 8 + %s3 = shl i64 %e3, 16 + %s4 = shl i64 %e4, 24 + %s5 = shl i64 %e5, 32 + %s6 = shl i64 %e6, 40 + %s7 = shl i64 %e7, 48 + %s8 = shl i64 %e8, 56 + + %o7 = or i64 %s8, %s7 + %o6 = or i64 %o7, %s6 + %o5 = or i64 %o6, %s5 + %o4 = or i64 %o5, %s4 + %o3 = or i64 %o4, %s3 + %o2 = or i64 %o3, %s2 + %o1 = or i64 %o2, %e1 + ret i64 %o1 +} + +define i64 @loadCombine_8consecutive_rev_BE(ptr %p) { +; LE-LABEL: @loadCombine_8consecutive_rev_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4 +; LE-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5 +; LE-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6 +; LE-NEXT: [[P7:%.*]] = getelementptr i8, ptr [[P]], i32 7 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1 +; LE-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1 +; LE-NEXT: [[L7:%.*]] = load i8, ptr [[P6]], align 1 +; LE-NEXT: [[L8:%.*]] = load i8, ptr [[P7]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64 +; LE-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64 +; LE-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64 +; LE-NEXT: [[E7:%.*]] = zext i8 [[L7]] to i64 +; LE-NEXT: [[E8:%.*]] = zext i8 [[L8]] to i64 +; LE-NEXT: [[S1:%.*]] = shl i64 [[E1]], 56 +; LE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 48 +; LE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 40 +; LE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32 +; LE-NEXT: [[S5:%.*]] = shl i64 [[E5]], 24 +; LE-NEXT: [[S6:%.*]] = shl i64 [[E6]], 16 +; LE-NEXT: [[S7:%.*]] = shl i64 [[E7]], 8 +; LE-NEXT: [[O7:%.*]] = or i64 [[E8]], [[S7]] +; LE-NEXT: [[O6:%.*]] = or i64 [[O7]], [[S6]] +; LE-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]] +; LE-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]] +; LE-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]] +; LE-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]] +; LE-NEXT: [[O1:%.*]] = or i64 [[O2]], [[S1]] +; LE-NEXT: ret i64 [[O1]] +; +; BE-LABEL: @loadCombine_8consecutive_rev_BE( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 4 +; BE-NEXT: [[P5:%.*]] = getelementptr i8, ptr [[P]], i32 5 +; BE-NEXT: [[P6:%.*]] = getelementptr i8, ptr [[P]], i32 6 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[L5:%.*]] = load i8, ptr [[P4]], align 1 +; BE-NEXT: [[L6:%.*]] = load i8, ptr [[P5]], align 1 +; BE-NEXT: [[L7:%.*]] = load i16, ptr [[P6]], align 1 +; BE-NEXT: [[TMP1:%.*]] = zext i16 [[L7]] to i64 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i64 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i64 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i64 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i64 +; BE-NEXT: [[E5:%.*]] = zext i8 [[L5]] to i64 +; BE-NEXT: [[E6:%.*]] = zext i8 [[L6]] to i64 +; BE-NEXT: [[S1:%.*]] = shl i64 [[E1]], 56 +; BE-NEXT: [[S2:%.*]] = shl i64 [[E2]], 48 +; BE-NEXT: [[S3:%.*]] = shl i64 [[E3]], 40 +; BE-NEXT: [[S4:%.*]] = shl i64 [[E4]], 32 +; BE-NEXT: [[S5:%.*]] = shl i64 [[E5]], 24 +; BE-NEXT: [[S6:%.*]] = shl i64 [[E6]], 16 +; BE-NEXT: [[O6:%.*]] = or i64 [[TMP1]], [[S6]] +; BE-NEXT: [[O5:%.*]] = or i64 [[O6]], [[S5]] +; BE-NEXT: [[O4:%.*]] = or i64 [[O5]], [[S4]] +; BE-NEXT: [[O3:%.*]] = or i64 [[O4]], [[S3]] +; BE-NEXT: [[O2:%.*]] = or i64 [[O3]], [[S2]] +; BE-NEXT: [[O1:%.*]] = or i64 [[O2]], [[S1]] +; BE-NEXT: ret i64 [[O1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %p4 = getelementptr i8, ptr %p, i32 4 + %p5 = getelementptr i8, ptr %p, i32 5 + %p6 = getelementptr i8, ptr %p, i32 6 + %p7 = getelementptr i8, ptr %p, i32 7 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + %l5 = load i8, ptr %p4 + %l6 = load i8, ptr %p5 + %l7 = load i8, ptr %p6 + %l8 = load i8, ptr %p7 + + %e1 = zext i8 %l1 to i64 + %e2 = zext i8 %l2 to i64 + %e3 = zext i8 %l3 to i64 + %e4 = zext i8 %l4 to i64 + %e5 = zext i8 %l5 to i64 + %e6 = zext i8 %l6 to i64 + %e7 = zext i8 %l7 to i64 + %e8 = zext i8 %l8 to i64 + + %s1 = shl i64 %e1, 56 + %s2 = shl i64 %e2, 48 + %s3 = shl i64 %e3, 40 + %s4 = shl i64 %e4, 32 + %s5 = shl i64 %e5, 24 + %s6 = shl i64 %e6, 16 + %s7 = shl i64 %e7, 8 + + %o7 = or i64 %e8, %s7 + %o6 = or i64 %o7, %s6 + %o5 = or i64 %o6, %s5 + %o4 = or i64 %o5, %s4 + %o3 = or i64 %o4, %s3 + %o2 = or i64 %o3, %s2 + %o1 = or i64 %o2, %s1 + ret i64 %o1 +} +