Index: llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp =================================================================== --- llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -427,11 +427,193 @@ return true; } +/// This is used by foldConsecutiveLoads() to capture a Root Load node which is +/// of type or(load, load) and recursively build the wide load. Also capture the +/// shift amount and loadSize. +struct LoadOps { + LoadInst *Root = nullptr; + bool FoundRoot = false; + uint64_t LoadSize = 0; + Value *Shift = nullptr; + Type *zext; +}; + +// Identify and Merge consecutive loads of the form +// 1. (zExt(L1) << shift1) | (zExt(L2) << shift2) -> zExt(L3) << shift1 +// 2. (? | (zExt(L1) << shift1)) | (zExt(L2) << shift2) +// -> ? | (zExt(L3) << shift1) +static bool foldConsecutiveLoads(Value *V, LoadOps &LOps, const DataLayout &DL, + AliasAnalysis *AA) { + Value *ShAmt2 = nullptr; + Value *X; + Instruction *L1, *L2; + + // Go to the last node with loads. + if (match(V, m_OneUse(m_c_Or( + m_Value(X), + m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))), + m_Value(ShAmt2)))))) || + match(V, m_OneUse(m_Or(m_Value(X), + m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))))))) + foldConsecutiveLoads(X, LOps, DL, AA); + else + return false; + + LoadInst *LI1, *LI2; + Value *ShAmt1 = nullptr; + + // Check if the pattern has loads + LI1 = LOps.Root; + ShAmt1 = LOps.Shift; + if (LOps.FoundRoot == false && + (match(X, m_OneUse(m_ZExt(m_Instruction(L1)))) || + match(X, m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1)))), + m_Value(ShAmt1)))))) { + LI1 = dyn_cast(L1); + } + LI2 = dyn_cast(L2); + + // Check if loads are same, atomic or volatile. + if (LI1 == LI2 || !LI1 || !LI2 || (!LI1->isSimple() && !LI2->isSimple())) + return false; + + // Check if Loads come from same BB. + if (LI1->getParent() != LI2->getParent()) + return false; + + // Swap loads if LI1 comes later as we handle only forward loads. + if (!LI1->comesBefore(LI2)) { + std::swap(LI1, LI2); + std::swap(ShAmt1, ShAmt2); + } + + // Find the data layout + bool IsBigEndian = DL.isBigEndian(); + + // Check if loads are consecutive and same size. + uint64_t Offset1 = 0; + uint64_t Offset2 = 0; + Value *Load1Ptr = LI1->getOperand(0); + if (isa(Load1Ptr) || isa(Load1Ptr)) { + APInt Offset(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0); + Load1Ptr = Load1Ptr->stripAndAccumulateConstantOffsets( + DL, Offset, + /* AllowNonInbounds */ true); + Offset1 = Offset.getZExtValue(); + } + + Value *Load2Ptr = LI2->getOperand(0); + if (isa(Load2Ptr) || isa(Load2Ptr)) { + APInt Offset(DL.getIndexTypeSizeInBits(Load2Ptr->getType()), 0); + Load2Ptr = Load2Ptr->stripAndAccumulateConstantOffsets( + DL, Offset, + /* AllowNonInbounds */ true); + Offset2 = Offset.getZExtValue(); + } + + // Verify if both loads have same base pointers and load sizes are same. + uint64_t loadSize1 = DL.getTypeStoreSizeInBits(LI1->getType()); + uint64_t loadSize2 = DL.getTypeStoreSizeInBits(LI2->getType()); + if ((Load1Ptr != Load2Ptr) || (loadSize1 != loadSize2)) + return false; + + // Alias Analysis to check for store b/w the loads. + MemoryLocation Loc = MemoryLocation::get(LI2); + for (Instruction &Inst : make_range(LI1->getIterator(), LI2->getIterator())) { + if (Inst.mayWriteToMemory() && isModSet(AA->getModRefInfo(&Inst, Loc))) + return false; + } + + // Big endian swap the shifts + if (IsBigEndian) + std::swap(ShAmt1, ShAmt2); + + // Find Shifts values. + const APInt *Temp; + uint64_t Shift1 = 0, Shift2 = 0; + if (ShAmt1 && match(ShAmt1, m_APInt(Temp))) + Shift1 = Temp->getZExtValue(); + if (ShAmt2 && match(ShAmt2, m_APInt(Temp))) + Shift2 = Temp->getZExtValue(); + + if (LOps.FoundRoot) + loadSize1 = LOps.LoadSize; + + // Verify if shift amount and load index aligns and verifies that loads + // are consecutive. + uint64_t ShiftDiff = IsBigEndian ? loadSize2 : loadSize1; + uint64_t PrevSize = + DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), loadSize1)); + if (!(((Shift2 - Shift1) == ShiftDiff) && ((Offset2 - Offset1) == PrevSize))) + return false; + + // Update LOps + if (LOps.FoundRoot == false) { + LOps.FoundRoot = true; + LOps.LoadSize = loadSize1 + loadSize2; + } else + LOps.LoadSize = LOps.LoadSize + loadSize2; + + LOps.Root = LI1; + LOps.Shift = ShAmt1; + LOps.zext = X->getType(); + return true; +} + +static bool foldLoadsIterative(Instruction &I, const DataLayout &DL, + TargetTransformInfo &TTI, AliasAnalysis *AA) { + LoadOps LOps; + if (!foldConsecutiveLoads(&I, LOps, DL, AA) || !LOps.FoundRoot) + return false; + + IRBuilder<> Builder(&I); + LoadInst *NewLoad, *LI1; + LI1 = LOps.Root; + + // TTI based checks if we want to proceed with wider load + bool Allowed = + TTI.isTypeLegal(IntegerType::get(I.getContext(), LOps.LoadSize)); + if (!Allowed) + return false; + + unsigned AS = LI1->getPointerAddressSpace(); + bool Fast = false; + Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize, + AS, LI1->getAlign(), &Fast); + if (!Allowed || !Fast) + return false; + + // New load can be generated + Value *Load1Ptr = LI1->getPointerOperand(); + NewLoad = + new LoadInst(IntegerType::get(Load1Ptr->getContext(), LOps.LoadSize), + Load1Ptr, "", LI1->isVolatile(), LI1->getAlign(), + LI1->getOrdering(), LI1->getSyncScopeID()); + + NewLoad->takeName(LI1); + copyMetadataForLoad(*NewLoad, *LI1); + Builder.SetInsertPoint(LI1); + Builder.Insert(NewLoad); + Value *NewOp = NewLoad; + + // Check if zero extend needed. + if (LOps.zext) + NewOp = Builder.CreateZExt(NewOp, LOps.zext); + + // Check if shift needed. We need to shift with the amount of load1 + // shift if not zero. + if (LOps.Shift) + NewOp = Builder.CreateShl(NewOp, LOps.Shift); + I.replaceAllUsesWith(NewOp); + + return true; +} + /// This is the entry point for folds that could be implemented in regular /// InstCombine, but they are separated because they are not expected to /// occur frequently and/or have more than a constant-length pattern match. static bool foldUnusualPatterns(Function &F, DominatorTree &DT, - TargetTransformInfo &TTI) { + TargetTransformInfo &TTI, AliasAnalysis *AA) { bool MadeChange = false; for (BasicBlock &BB : F) { // Ignore unreachable basic blocks. @@ -448,6 +630,8 @@ MadeChange |= foldGuardedFunnelShift(I, DT); MadeChange |= tryToRecognizePopCount(I); MadeChange |= tryToFPToSat(I, TTI); + MadeChange |= + foldLoadsIterative(I, F.getParent()->getDataLayout(), TTI, AA); } } @@ -462,12 +646,13 @@ /// This is the entry point for all transforms. Pass manager differences are /// handled in the callers of this function. static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI, - TargetLibraryInfo &TLI, DominatorTree &DT) { + TargetLibraryInfo &TLI, DominatorTree &DT, + AliasAnalysis *AA) { bool MadeChange = false; const DataLayout &DL = F.getParent()->getDataLayout(); TruncInstCombine TIC(AC, TLI, DL, DT); MadeChange |= TIC.run(F); - MadeChange |= foldUnusualPatterns(F, DT, TTI); + MadeChange |= foldUnusualPatterns(F, DT, TTI, AA); return MadeChange; } @@ -489,7 +674,8 @@ auto &TLI = getAnalysis().getTLI(F); auto &DT = getAnalysis().getDomTree(); auto &TTI = getAnalysis().getTTI(F); - return runImpl(F, AC, TTI, TLI, DT); + auto AA = &getAnalysis().getAAResults(); + return runImpl(F, AC, TTI, TLI, DT, AA); } PreservedAnalyses AggressiveInstCombinePass::run(Function &F, @@ -498,7 +684,8 @@ auto &TLI = AM.getResult(F); auto &DT = AM.getResult(F); auto &TTI = AM.getResult(F); - if (!runImpl(F, AC, TTI, TLI, DT)) { + auto *AA = &AM.getResult(F); + if (!runImpl(F, AC, TTI, TLI, DT, AA)) { // No changes, all analyses are preserved. return PreservedAnalyses::all(); } Index: llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll @@ -0,0 +1,730 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=aggressive-instcombine -S -mtriple aarch64 -data-layout="e-n64" | FileCheck %s --check-prefixes=ALL,LE +; RUN: opt < %s -passes=aggressive-instcombine -S -mtriple aarch64 -data-layout="E-n64" | FileCheck %s --check-prefixes=ALL,BE + +define i16 @loadCombine_2consecutive(ptr %p) { +; +; ALL-LABEL: @loadCombine_2consecutive( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 +; ALL-NEXT: [[S2:%.*]] = shl i16 [[E2]], 8 +; ALL-NEXT: [[O1:%.*]] = or i16 [[E1]], [[S2]] +; ALL-NEXT: ret i16 [[O1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %e1 = zext i8 %l1 to i16 + %e2 = zext i8 %l2 to i16 + %s2 = shl i16 %e2, 8 + %o1 = or i16 %e1, %s2 + ret i16 %o1 +} + +define i16 @loadCombine_2consecutive_BE(ptr %p) { +; ALL-LABEL: @loadCombine_2consecutive_BE( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 +; ALL-NEXT: [[S1:%.*]] = shl i16 [[E1]], 8 +; ALL-NEXT: [[O1:%.*]] = or i16 [[S1]], [[E2]] +; ALL-NEXT: ret i16 [[O1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %e1 = zext i8 %l1 to i16 + %e2 = zext i8 %l2 to i16 + %s1 = shl i16 %e1, 8 + %o1 = or i16 %s1, %e2 + ret i16 %o1 +} + +define i32 @loadCombine_4consecutive(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive( +; LE-NEXT: [[TMP1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: ret i32 [[TMP1]] +; +; BE-LABEL: @loadCombine_4consecutive( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_BE(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_BE( +; BE-NEXT: [[TMP1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; BE-NEXT: ret i32 [[TMP1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 24 + %s2 = shl i32 %e2, 16 + %s3 = shl i32 %e3, 8 + + %o1 = or i32 %s1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %e4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_alias(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_alias( +; LE-NEXT: [[TMP1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: store i8 10, ptr [[P]], align 1 +; LE-NEXT: ret i32 [[TMP1]] +; +; BE-LABEL: @loadCombine_4consecutive_alias( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: store i8 10, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + store i8 10, i8* %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_alias_BE(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_alias_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: store i8 10, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_alias_BE( +; BE-NEXT: [[TMP1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; BE-NEXT: store i8 10, ptr [[P]], align 1 +; BE-NEXT: ret i32 [[TMP1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + store i8 10, i8* %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 24 + %s2 = shl i32 %e2, 16 + %s3 = shl i32 %e3, 8 + + %o1 = or i32 %s1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %e4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_alias2(ptr %p, ptr %pstr) { +; ALL-LABEL: @loadCombine_4consecutive_alias2( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + store i8 10, i8* %pstr + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_alias2_BE(ptr %p, ptr %pstr) { +; ALL-LABEL: @loadCombine_4consecutive_alias2_BE( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + store i8 10, i8* %pstr + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 24 + %s2 = shl i32 %e2, 16 + %s3 = shl i32 %e3, 8 + + %o1 = or i32 %s1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %e4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_alias3(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_alias3( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: store i8 10, ptr [[P3]], align 1 +; ALL-NEXT: store i8 5, ptr [[P]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + store i8 10, i8* %p3 + store i8 5, i8* %p + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_alias3_BE(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_alias3_BE( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: store i8 10, ptr [[P3]], align 1 +; ALL-NEXT: store i8 5, ptr [[P]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + store i8 10, i8* %p3 + store i8 5, i8* %p + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 24 + %s2 = shl i32 %e2, 16 + %s3 = shl i32 %e3, 8 + + %o1 = or i32 %s1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %e4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_with_alias4(ptr %p, ptr %ps) { +; ALL-LABEL: @loadCombine_4consecutive_with_alias4( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[PS1:%.*]] = getelementptr i8, ptr [[PS:%.*]], i32 1 +; ALL-NEXT: [[PS2:%.*]] = getelementptr i8, ptr [[PS]], i32 2 +; ALL-NEXT: [[PS3:%.*]] = getelementptr i8, ptr [[PS]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: store i8 10, ptr [[PS]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: store i8 10, ptr [[PS1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: store i8 10, ptr [[PS2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: store i8 10, ptr [[PS3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %ps1 = getelementptr i8, ptr %ps, i32 1 + %ps2 = getelementptr i8, ptr %ps, i32 2 + %ps3 = getelementptr i8, ptr %ps, i32 3 + %l1 = load i8, ptr %p + store i8 10, i8* %ps + %l2 = load i8, ptr %p1 + store i8 10, i8* %ps1 + %l3 = load i8, ptr %p2 + store i8 10, i8* %ps2 + %l4 = load i8, ptr %p3 + store i8 10, i8* %ps3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +declare void @use(i8) +declare void @use2(i32) + +define i32 @loadCombine_4consecutive_hasOneUse1(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_hasOneUse1( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: call void @use(i8 [[L1]]) +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: call void @use(i8 [[L2]]) +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: call void @use(i8 [[L3]]) +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: call void @use(i8 [[L4]]) +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + call void @use(i8 %l1) + %l2 = load i8, ptr %p1 + call void @use(i8 %l2) + %l3 = load i8, ptr %p2 + call void @use(i8 %l3) + %l4 = load i8, ptr %p3 + call void @use(i8 %l4) + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_hasOneUse2(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_hasOneUse2( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: call void @use(i32 [[E1]]) +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: call void @use(i32 [[E2]]) +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: call void @use(i32 [[E3]]) +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: call void @use(i32 [[E4]]) +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + call void @use(i32 %e1) + %e2 = zext i8 %l2 to i32 + call void @use(i32 %e2) + %e3 = zext i8 %l3 to i32 + call void @use(i32 %e3) + %e4 = zext i8 %l4 to i32 + call void @use(i32 %e4) + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_hasOneUse3(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_hasOneUse3( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: call void @use(i32 [[S2]]) +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: call void @use(i32 [[S3]]) +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: call void @use(i32 [[S4]]) +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + call void @use(i32 %s2) + %s3 = shl i32 %e3, 16 + call void @use(i32 %s3) + %s4 = shl i32 %e4, 24 + call void @use(i32 %s4) + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_hasOneUse4(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_hasOneUse4( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: call void @use(i32 [[O1]]) +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: call void @use(i32 [[O2]]) +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + call void @use(i32 %o1) + %o2 = or i32 %o1, %s3 + call void @use(i32 %o2) + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_parLoad1(ptr %p) { +; ALL-LABEL: @loadCombine_parLoad1( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: ret i32 [[O2]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + ret i32 %o2 +} + Index: llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll @@ -0,0 +1,814 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=aggressive-instcombine -mtriple x86_64-none-eabi -mattr=avx2 -data-layout="e-n64" -S | FileCheck %s --check-prefixes=ALL,LE +; RUN: opt < %s -passes=aggressive-instcombine -mtriple x86_64-none-eabi -mattr=avx2 -data-layout="E-n64" -S | FileCheck %s --check-prefixes=ALL,BE + +define i16 @loadCombine_2consecutive(ptr %p) { +; +; LE-LABEL: @loadCombine_2consecutive( +; LE-NEXT: [[TMP1:%.*]] = load i16, ptr [[P:%.*]], align 1 +; LE-NEXT: ret i16 [[TMP1]] +; +; BE-LABEL: @loadCombine_2consecutive( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 +; BE-NEXT: [[S2:%.*]] = shl i16 [[E2]], 8 +; BE-NEXT: [[O1:%.*]] = or i16 [[E1]], [[S2]] +; BE-NEXT: ret i16 [[O1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %e1 = zext i8 %l1 to i16 + %e2 = zext i8 %l2 to i16 + %s2 = shl i16 %e2, 8 + %o1 = or i16 %e1, %s2 + ret i16 %o1 +} + +define i16 @loadCombine_2consecutive_BE(ptr %p) { +; LE-LABEL: @loadCombine_2consecutive_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 +; LE-NEXT: [[S1:%.*]] = shl i16 [[E1]], 8 +; LE-NEXT: [[O1:%.*]] = or i16 [[S1]], [[E2]] +; LE-NEXT: ret i16 [[O1]] +; +; BE-LABEL: @loadCombine_2consecutive_BE( +; BE-NEXT: [[TMP1:%.*]] = load i16, ptr [[P:%.*]], align 1 +; BE-NEXT: ret i16 [[TMP1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %e1 = zext i8 %l1 to i16 + %e2 = zext i8 %l2 to i16 + %s1 = shl i16 %e1, 8 + %o1 = or i16 %s1, %e2 + ret i16 %o1 +} + +define i32 @loadCombine_4consecutive(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive( +; LE-NEXT: [[TMP1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: ret i32 [[TMP1]] +; +; BE-LABEL: @loadCombine_4consecutive( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_BE(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_BE( +; BE-NEXT: [[TMP1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; BE-NEXT: ret i32 [[TMP1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 24 + %s2 = shl i32 %e2, 16 + %s3 = shl i32 %e3, 8 + + %o1 = or i32 %s1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %e4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_alias(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_alias( +; LE-NEXT: [[TMP1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: store i8 10, ptr [[P]], align 1 +; LE-NEXT: ret i32 [[TMP1]] +; +; BE-LABEL: @loadCombine_4consecutive_alias( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: store i8 10, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + store i8 10, i8* %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_alias_BE(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_alias_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: store i8 10, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_alias_BE( +; BE-NEXT: [[TMP1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; BE-NEXT: store i8 10, ptr [[P]], align 1 +; BE-NEXT: ret i32 [[TMP1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + store i8 10, i8* %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 24 + %s2 = shl i32 %e2, 16 + %s3 = shl i32 %e3, 8 + + %o1 = or i32 %s1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %e4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_alias2(ptr %p, ptr %pstr) { +; LE-LABEL: @loadCombine_4consecutive_alias2( +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[TMP1:%.*]] = load i16, ptr [[P]], align 1 +; LE-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; LE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; LE-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_alias2( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + store i8 10, i8* %pstr + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_alias2_BE(ptr %p, ptr %pstr) { +; LE-LABEL: @loadCombine_4consecutive_alias2_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_alias2_BE( +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[TMP1:%.*]] = load i16, ptr [[P]], align 1 +; BE-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 +; BE-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 16 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: store i8 10, ptr [[PSTR:%.*]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; BE-NEXT: [[O2:%.*]] = or i32 [[TMP3]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + store i8 10, i8* %pstr + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 24 + %s2 = shl i32 %e2, 16 + %s3 = shl i32 %e3, 8 + + %o1 = or i32 %s1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %e4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_alias3(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_alias3( +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[TMP1:%.*]] = load i16, ptr [[P]], align 1 +; LE-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: store i8 10, ptr [[P3]], align 1 +; LE-NEXT: store i8 5, ptr [[P]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; LE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; LE-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_alias3( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: store i8 10, ptr [[P3]], align 1 +; BE-NEXT: store i8 5, ptr [[P]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + store i8 10, i8* %p3 + store i8 5, i8* %p + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_alias3_BE(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_alias3_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: store i8 10, ptr [[P3]], align 1 +; LE-NEXT: store i8 5, ptr [[P]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_alias3_BE( +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[TMP1:%.*]] = load i16, ptr [[P]], align 1 +; BE-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 +; BE-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 16 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: store i8 10, ptr [[P3]], align 1 +; BE-NEXT: store i8 5, ptr [[P]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; BE-NEXT: [[O2:%.*]] = or i32 [[TMP3]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + store i8 10, i8* %p3 + store i8 5, i8* %p + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 24 + %s2 = shl i32 %e2, 16 + %s3 = shl i32 %e3, 8 + + %o1 = or i32 %s1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %e4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_with_alias4(ptr %p, ptr %ps) { +; ALL-LABEL: @loadCombine_4consecutive_with_alias4( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[PS1:%.*]] = getelementptr i8, ptr [[PS:%.*]], i32 1 +; ALL-NEXT: [[PS2:%.*]] = getelementptr i8, ptr [[PS]], i32 2 +; ALL-NEXT: [[PS3:%.*]] = getelementptr i8, ptr [[PS]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: store i8 10, ptr [[PS]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: store i8 10, ptr [[PS1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: store i8 10, ptr [[PS2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: store i8 10, ptr [[PS3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %ps1 = getelementptr i8, ptr %ps, i32 1 + %ps2 = getelementptr i8, ptr %ps, i32 2 + %ps3 = getelementptr i8, ptr %ps, i32 3 + %l1 = load i8, ptr %p + store i8 10, i8* %ps + %l2 = load i8, ptr %p1 + store i8 10, i8* %ps1 + %l3 = load i8, ptr %p2 + store i8 10, i8* %ps2 + %l4 = load i8, ptr %p3 + store i8 10, i8* %ps3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +declare void @use(i8) +declare void @use2(i32) + +define i32 @loadCombine_4consecutive_hasOneUse1(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_hasOneUse1( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: call void @use(i8 [[L1]]) +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: call void @use(i8 [[L2]]) +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: call void @use(i8 [[L3]]) +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: call void @use(i8 [[L4]]) +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + call void @use(i8 %l1) + %l2 = load i8, ptr %p1 + call void @use(i8 %l2) + %l3 = load i8, ptr %p2 + call void @use(i8 %l3) + %l4 = load i8, ptr %p3 + call void @use(i8 %l4) + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_hasOneUse2(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_hasOneUse2( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: call void @use(i32 [[E1]]) +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: call void @use(i32 [[E2]]) +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: call void @use(i32 [[E3]]) +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: call void @use(i32 [[E4]]) +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + call void @use(i32 %e1) + %e2 = zext i8 %l2 to i32 + call void @use(i32 %e2) + %e3 = zext i8 %l3 to i32 + call void @use(i32 %e3) + %e4 = zext i8 %l4 to i32 + call void @use(i32 %e4) + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_hasOneUse3(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_hasOneUse3( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: call void @use(i32 [[S2]]) +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: call void @use(i32 [[S3]]) +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: call void @use(i32 [[S4]]) +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + call void @use(i32 %s2) + %s3 = shl i32 %e3, 16 + call void @use(i32 %s3) + %s4 = shl i32 %e4, 24 + call void @use(i32 %s4) + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_hasOneUse4(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_hasOneUse4( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: call void @use(i32 [[O1]]) +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: call void @use(i32 [[O2]]) +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + call void @use(i32 %o1) + %o2 = or i32 %o1, %s3 + call void @use(i32 %o2) + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_parLoad1(ptr %p) { +; LE-LABEL: @loadCombine_parLoad1( +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2 +; LE-NEXT: [[TMP1:%.*]] = load i16, ptr [[P]], align 1 +; LE-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; LE-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[S3]] +; LE-NEXT: ret i32 [[O2]] +; +; BE-LABEL: @loadCombine_parLoad1( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: ret i32 [[O2]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + ret i32 %o2 +} +