Index: llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -11,8 +11,10 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/Loads.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/PatternMatch.h" @@ -3110,6 +3112,162 @@ Builder.CreateOr(C, Builder.CreateAnd(A, B)), D); } + // Identify and Merge consecutive loads of the form + // 1. (zExt(L1) << shift1) | (zExt(L2) << shift2) -> zExt(L3) << shift1 + // 2. (? | (zExt(L1) << shift1)) | (zExt(L2) << shift2) -> ? | (zExt(L3) << + // shift1) + Value *ShAmt2 = nullptr; + Instruction *L1, *L2; + // Check for loads in one of the operands. + if (match(&I, + m_c_Or(m_Value(X), + m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))), + m_Value(ShAmt2))))) || + match(&I, m_c_Or(m_Value(X), + m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2))))))) { + Value *ShAmt1 = nullptr; + Value *Hold = nullptr; + + // Check if X has loads. + if ((match(X, m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1))))) || + match(X, m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1)))), + m_Value(ShAmt1)))) || + match(X, + m_OneUse(m_c_Or( + m_Value(Hold), + m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1)))), + m_Value(ShAmt1)))))))) { + + // Check if loads are same, atomic or volatile. + LoadInst *LI1, *LI2; + LI1 = dyn_cast(L1); + LI2 = dyn_cast(L2); + + if ((LI1 == LI2) || !LI1 || !LI2 || + (!LI1->isSimple() && !LI2->isSimple())) + return nullptr; + + // Find Shifts values. + const APInt *Temp; + uint64_t Shift1 = 0, Shift2 = 0; + if (ShAmt1 && match(ShAmt1, m_APInt(Temp))) + Shift1 = Temp->getZExtValue(); + if (ShAmt2 && match(ShAmt2, m_APInt(Temp))) + Shift2 = Temp->getZExtValue(); + + // Use L1 as the load with smaller shift. + // BigEndian however Loads are reverse. + bool IsBigEndian = DL.isBigEndian(); + + if (Shift1 > Shift2) { + if (!IsBigEndian) + std::swap(LI1, LI2); + std::swap(Shift1, Shift2); + std::swap(ShAmt1, ShAmt2); + } else { + if (IsBigEndian) + std::swap(LI1, LI2); + } + + // Check if loads are consecutive and same size. + // Second Load must be a GEP. Extract load info. + Value *Op1 = LI1->getOperand(0); + Value *Op2 = LI2->getOperand(0); + if (isa(Op2)) { + GetElementPtrInst *GEP2 = dyn_cast(Op2); + // Parse Index + uint64_t Idx2 = 0; + uint64_t Src2Type = + DL.getTypeStoreSizeInBits(GEP2->getSourceElementType()); + if (ConstantInt *CI = dyn_cast(GEP2->getOperand(1))) + Idx2 = CI->getZExtValue(); + + // Is first Load a GEP + Value *Load1Ptr; + uint64_t Idx1 = 0; + uint64_t Src1Type = 0; + GetElementPtrInst *GEP1 = nullptr; + if (isa(Op1)) { + GEP1 = dyn_cast(Op1); + Load1Ptr = GEP1->getPointerOperand(); + if (ConstantInt *CI = dyn_cast(GEP1->getOperand(1))) + Idx1 = CI->getZExtValue(); + Src1Type = DL.getTypeStoreSizeInBits(GEP1->getSourceElementType()); + if (Src1Type != Src2Type) + return nullptr; + } else + // Considering this then a direct load from pointer. + Load1Ptr = LI1->getPointerOperand(); + + // Verify if both loads have same base pointers and load sizes are same. + uint64_t loadSize1 = DL.getTypeStoreSizeInBits(LI1->getType()); + uint64_t loadSize2 = DL.getTypeStoreSizeInBits(LI2->getType()); + if ((Load1Ptr != GEP2->getPointerOperand()) || (loadSize1 != loadSize2)) + return nullptr; + + // Verify if shift amount and load index aligns and verifies that loads + // are consecutive. + if (!(((Shift2 - Shift1) == loadSize1) && + (((Idx2 * Src2Type) - (Idx1 * Src1Type)) == loadSize1))) + return nullptr; + + // Alias Analysis to check for store b/w the loads. + bool IsLoadCSE = false; + if (LI1->comesBefore(LI2)) { + BasicBlock::iterator BBIt(LI2); + if (!FindAvailableLoadedValue(LI1, LI2->getParent(), BBIt, + DefMaxInstsToScan, AA, &IsLoadCSE)) + return nullptr; + } else { + BasicBlock::iterator BBIt(LI1); + if (!FindAvailableLoadedValue(LI2, LI1->getParent(), BBIt, + DefMaxInstsToScan, AA, &IsLoadCSE)) + return nullptr; + } + + // Define New load. + LoadInst *NewLoad; + if (!GEP1) + NewLoad = new LoadInst( + IntegerType::get(Load1Ptr->getContext(), loadSize1 * 2), Load1Ptr, + "", LI1->isVolatile(), LI1->getAlign(), LI1->getOrdering(), + LI1->getSyncScopeID()); + else + NewLoad = + new LoadInst(IntegerType::get(GEP1->getContext(), loadSize1 * 2), + GEP1, "", LI1->isVolatile(), LI1->getAlign(), + LI1->getOrdering(), LI1->getSyncScopeID()); + + // Attempt to improve the alignment. + Value *Op = LI1->getOperand(0); + Align KnownAlign = getOrEnforceKnownAlignment( + Op, DL.getPrefTypeAlign(LI1->getType()), DL, LI1, nullptr, &DT); + if (KnownAlign > LI1->getAlign()) + LI1->setAlignment(KnownAlign); + + NewLoad->takeName(LI1); + copyMetadataForLoad(*NewLoad, *LI1); + InsertNewInstWith(NewLoad, *LI1); + + // Check if zero extend needed. + Value *NewOp = NewLoad; + if (NewLoad->getType() != X->getType()) + NewOp = Builder.CreateZExt(NewLoad, X->getType()); + + // Check if shift needed. We need to shift with the amount of Load1 + // shift if not zero. + if (Shift1 != 0) + NewOp = Builder.CreateShl(NewOp, ShAmt1); + + if (!Hold) { + Value *Zero = Constant::getNullValue(X->getType()); + return BinaryOperator::CreateOr(NewOp, Zero); + } else + return BinaryOperator::CreateOr(Hold, NewOp); + } + } + } + return nullptr; } Index: llvm/test/Transforms/InstCombine/or-load.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/or-load.ll @@ -0,0 +1,1020 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S -data-layout="e-n64" | FileCheck %s --check-prefixes=ALL,LE +; RUN: opt < %s -passes=instcombine -S -data-layout="E-n64" | FileCheck %s --check-prefixes=ALL,BE + +define i16 @loadCombine_2consecutive(ptr %p) { +; LE-LABEL: @loadCombine_2consecutive( +; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P:%.*]], align 1 +; LE-NEXT: ret i16 [[L1]] +; +; BE-LABEL: @loadCombine_2consecutive( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 +; BE-NEXT: [[S2:%.*]] = shl nuw i16 [[E2]], 8 +; BE-NEXT: [[O1:%.*]] = or i16 [[S2]], [[E1]] +; BE-NEXT: ret i16 [[O1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + + %e1 = zext i8 %l1 to i16 + %e2 = zext i8 %l2 to i16 + + %s1 = shl i16 %e1, 0 + %s2 = shl i16 %e2, 8 + + %o1 = or i16 %s1, %s2 + ret i16 %o1 +} + +define i16 @loadCombine_2consecutive_BE(ptr %p) { +; LE-LABEL: @loadCombine_2consecutive_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 +; LE-NEXT: [[S1:%.*]] = shl nuw i16 [[E1]], 8 +; LE-NEXT: [[O1:%.*]] = or i16 [[S1]], [[E2]] +; LE-NEXT: ret i16 [[O1]] +; +; BE-LABEL: @loadCombine_2consecutive_BE( +; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P:%.*]], align 1 +; BE-NEXT: ret i16 [[L1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + + %e1 = zext i8 %l1 to i16 + %e2 = zext i8 %l2 to i16 + + %s1 = shl i16 %e1, 8 + %s2 = shl i16 %e2, 0 + + %o1 = or i16 %s1, %s2 + ret i16 %o1 +} + +define i32 @loadCombine_4consecutive(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive( +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 0 + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_rev(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_rev( +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_rev( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[S4]], [[S3]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S2]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 0 + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s4, %s3 + %o2 = or i32 %o1, %s2 + %o3 = or i32 %o2, %s1 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_rev2(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_rev2( +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_rev2( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; BE-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; BE-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; BE-NEXT: [[O1:%.*]] = or i32 [[S4]], [[S3]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S2]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l4 = load i8, ptr %p3 + %l3 = load i8, ptr %p2 + %l2 = load i8, ptr %p1 + %l1 = load i8, ptr %p + + %e4 = zext i8 %l4 to i32 + %e3 = zext i8 %l3 to i32 + %e2 = zext i8 %l2 to i32 + %e1 = zext i8 %l1 to i32 + + %s4 = shl i32 %e4, 24 + %s3 = shl i32 %e3, 16 + %s2 = shl i32 %e2, 8 + %s1 = shl i32 %e1, 0 + + %o1 = or i32 %s4, %s3 + %o2 = or i32 %o1, %s2 + %o3 = or i32 %o2, %s1 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_BE(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl nuw i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_BE( +; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; BE-NEXT: ret i32 [[L1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 24 + %s2 = shl i32 %e2, 16 + %s3 = shl i32 %e3, 8 + %s4 = shl i32 %e4, 0 + + %o1 = or i32 %s1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_BE_rev(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_BE_rev( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl nuw i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S3]], [[E4]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S2]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S1]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_BE_rev( +; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; BE-NEXT: ret i32 [[L1]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 24 + %s2 = shl i32 %e2, 16 + %s3 = shl i32 %e3, 8 + %s4 = shl i32 %e4, 0 + + %o1 = or i32 %s4, %s3 + %o2 = or i32 %o1, %s2 + %o3 = or i32 %o2, %s1 + ret i32 %o3 +} + +define i64 @loadCombine_4consecutive_16bit(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_16bit( +; LE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 2 +; LE-NEXT: ret i64 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_16bit( +; BE-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i64 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i64 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i64 3 +; BE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2 +; BE-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2 +; BE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2 +; BE-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2 +; BE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64 +; BE-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64 +; BE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64 +; BE-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64 +; BE-NEXT: [[S2:%.*]] = shl nuw nsw i64 [[E2]], 16 +; BE-NEXT: [[S3:%.*]] = shl nuw nsw i64 [[E3]], 32 +; BE-NEXT: [[S4:%.*]] = shl nuw i64 [[E4]], 48 +; BE-NEXT: [[O1:%.*]] = or i64 [[S2]], [[E1]] +; BE-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S4]] +; BE-NEXT: ret i64 [[O3]] +; + %p1 = getelementptr i16, ptr %p, i32 1 + %p2 = getelementptr i16, ptr %p, i32 2 + %p3 = getelementptr i16, ptr %p, i32 3 + %l1 = load i16, ptr %p + %l2 = load i16, ptr %p1 + %l3 = load i16, ptr %p2 + %l4 = load i16, ptr %p3 + + %e1 = zext i16 %l1 to i64 + %e2 = zext i16 %l2 to i64 + %e3 = zext i16 %l3 to i64 + %e4 = zext i16 %l4 to i64 + + %s1 = shl i64 %e1, 0 + %s2 = shl i64 %e2, 16 + %s3 = shl i64 %e3, 32 + %s4 = shl i64 %e4, 48 + + %o1 = or i64 %s1, %s2 + %o2 = or i64 %o1, %s3 + %o3 = or i64 %o2, %s4 + ret i64 %o3 +} + +define i64 @loadCombine_4consecutive_16bit_BE(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_16bit_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i64 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i64 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i64 3 +; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 2 +; LE-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 2 +; LE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 2 +; LE-NEXT: [[L4:%.*]] = load i16, ptr [[P3]], align 2 +; LE-NEXT: [[E1:%.*]] = zext i16 [[L1]] to i64 +; LE-NEXT: [[E2:%.*]] = zext i16 [[L2]] to i64 +; LE-NEXT: [[E3:%.*]] = zext i16 [[L3]] to i64 +; LE-NEXT: [[E4:%.*]] = zext i16 [[L4]] to i64 +; LE-NEXT: [[S1:%.*]] = shl nuw i64 [[E1]], 48 +; LE-NEXT: [[S2:%.*]] = shl nuw nsw i64 [[E2]], 32 +; LE-NEXT: [[S3:%.*]] = shl nuw nsw i64 [[E3]], 16 +; LE-NEXT: [[O1:%.*]] = or i64 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i64 [[O2]], [[E4]] +; LE-NEXT: ret i64 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_16bit_BE( +; BE-NEXT: [[L1:%.*]] = load i64, ptr [[P:%.*]], align 2 +; BE-NEXT: ret i64 [[L1]] +; + %p1 = getelementptr i16, ptr %p, i32 1 + %p2 = getelementptr i16, ptr %p, i32 2 + %p3 = getelementptr i16, ptr %p, i32 3 + %l1 = load i16, ptr %p + %l2 = load i16, ptr %p1 + %l3 = load i16, ptr %p2 + %l4 = load i16, ptr %p3 + + %e1 = zext i16 %l1 to i64 + %e2 = zext i16 %l2 to i64 + %e3 = zext i16 %l3 to i64 + %e4 = zext i16 %l4 to i64 + + %s1 = shl i64 %e1, 48 + %s2 = shl i64 %e2, 32 + %s3 = shl i64 %e3, 16 + %s4 = shl i64 %e4, 0 + + %o1 = or i64 %s1, %s2 + %o2 = or i64 %o1, %s3 + %o3 = or i64 %o2, %s4 + ret i64 %o3 +} + + +define i32 @loadCombine_4consecutive_with_alias1(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_with_alias1( +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 2 +; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 +; LE-NEXT: store i8 10, ptr [[P]], align 1 +; LE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 1 +; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32 +; LE-NEXT: [[TMP2:%.*]] = zext i16 [[L3]] to i32 +; LE-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 16 +; LE-NEXT: [[O3:%.*]] = or i32 [[TMP3]], [[TMP1]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_with_alias1( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: store i8 10, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + store i8 10, i8* %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 0 + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_with_alias2(ptr %p, ptr %ps) { +; LE-LABEL: @loadCombine_4consecutive_with_alias2( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: store i8 10, ptr [[PS:%.*]], align 1 +; LE-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L2]] to i32 +; LE-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 8 +; LE-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[E1]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_with_alias2( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: store i8 10, ptr [[PS:%.*]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + store i8 10, i8* %ps + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 0 + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_with_alias3(ptr %p, ptr %ps) { +; ALL-LABEL: @loadCombine_4consecutive_with_alias3( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; ALL-NEXT: [[PS1:%.*]] = getelementptr i8, ptr [[PS:%.*]], i64 1 +; ALL-NEXT: [[PS2:%.*]] = getelementptr i8, ptr [[PS]], i64 2 +; ALL-NEXT: [[PS3:%.*]] = getelementptr i8, ptr [[PS]], i64 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: store i8 10, ptr [[PS]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: store i8 10, ptr [[PS1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: store i8 10, ptr [[PS2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: store i8 10, ptr [[PS3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %ps1 = getelementptr i8, ptr %ps, i32 1 + %ps2 = getelementptr i8, ptr %ps, i32 2 + %ps3 = getelementptr i8, ptr %ps, i32 3 + %l1 = load i8, ptr %p + store i8 10, i8* %ps + %l2 = load i8, ptr %p1 + store i8 10, i8* %ps1 + %l3 = load i8, ptr %p2 + store i8 10, i8* %ps2 + %l4 = load i8, ptr %p3 + store i8 10, i8* %ps3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 0 + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +; Function Attrs: nounwind uwtable +define i64 @Load64(ptr %ptr) { +; LE-LABEL: @Load64( +; LE-NEXT: entry: +; LE-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR:%.*]], align 1 +; LE-NEXT: ret i64 [[TMP0]] +; +; BE-LABEL: @Load64( +; BE-NEXT: entry: +; BE-NEXT: [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1 +; BE-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i64 +; BE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 +; BE-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; BE-NEXT: [[CONV2:%.*]] = zext i8 [[TMP1]] to i64 +; BE-NEXT: [[SHL:%.*]] = shl nuw nsw i64 [[CONV2]], 8 +; BE-NEXT: [[OR:%.*]] = or i64 [[SHL]], [[CONV]] +; BE-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2 +; BE-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1 +; BE-NEXT: [[CONV4:%.*]] = zext i8 [[TMP2]] to i64 +; BE-NEXT: [[SHL5:%.*]] = shl nuw nsw i64 [[CONV4]], 16 +; BE-NEXT: [[OR6:%.*]] = or i64 [[OR]], [[SHL5]] +; BE-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 +; BE-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1 +; BE-NEXT: [[CONV8:%.*]] = zext i8 [[TMP3]] to i64 +; BE-NEXT: [[SHL9:%.*]] = shl nuw nsw i64 [[CONV8]], 24 +; BE-NEXT: [[OR10:%.*]] = or i64 [[OR6]], [[SHL9]] +; BE-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4 +; BE-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX11]], align 1 +; BE-NEXT: [[CONV12:%.*]] = zext i8 [[TMP4]] to i64 +; BE-NEXT: [[SHL13:%.*]] = shl nuw nsw i64 [[CONV12]], 32 +; BE-NEXT: [[OR14:%.*]] = or i64 [[OR10]], [[SHL13]] +; BE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 5 +; BE-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX15]], align 1 +; BE-NEXT: [[CONV16:%.*]] = zext i8 [[TMP5]] to i64 +; BE-NEXT: [[SHL17:%.*]] = shl nuw nsw i64 [[CONV16]], 40 +; BE-NEXT: [[OR18:%.*]] = or i64 [[OR14]], [[SHL17]] +; BE-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 6 +; BE-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX19]], align 1 +; BE-NEXT: [[CONV20:%.*]] = zext i8 [[TMP6]] to i64 +; BE-NEXT: [[SHL21:%.*]] = shl nuw nsw i64 [[CONV20]], 48 +; BE-NEXT: [[OR22:%.*]] = or i64 [[OR18]], [[SHL21]] +; BE-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 7 +; BE-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX23]], align 1 +; BE-NEXT: [[CONV24:%.*]] = zext i8 [[TMP7]] to i64 +; BE-NEXT: [[SHL25:%.*]] = shl nuw i64 [[CONV24]], 56 +; BE-NEXT: [[OR26:%.*]] = or i64 [[OR22]], [[SHL25]] +; BE-NEXT: ret i64 [[OR26]] +; +entry: + %0 = load i8, ptr %ptr, align 1 + %conv = zext i8 %0 to i64 + %arrayidx1 = getelementptr inbounds i8, ptr %ptr, i64 1 + %1 = load i8, ptr %arrayidx1, align 1 + %conv2 = zext i8 %1 to i64 + %shl = shl i64 %conv2, 8 + %or = or i64 %conv, %shl + %arrayidx3 = getelementptr inbounds i8, ptr %ptr, i64 2 + %2 = load i8, ptr %arrayidx3, align 1 + %conv4 = zext i8 %2 to i64 + %shl5 = shl i64 %conv4, 16 + %or6 = or i64 %or, %shl5 + %arrayidx7 = getelementptr inbounds i8, ptr %ptr, i64 3 + %3 = load i8, ptr %arrayidx7, align 1 + %conv8 = zext i8 %3 to i64 + %shl9 = shl i64 %conv8, 24 + %or10 = or i64 %or6, %shl9 + %arrayidx11 = getelementptr inbounds i8, ptr %ptr, i64 4 + %4 = load i8, ptr %arrayidx11, align 1 + %conv12 = zext i8 %4 to i64 + %shl13 = shl i64 %conv12, 32 + %or14 = or i64 %or10, %shl13 + %arrayidx15 = getelementptr inbounds i8, ptr %ptr, i64 5 + %5 = load i8, ptr %arrayidx15, align 1 + %conv16 = zext i8 %5 to i64 + %shl17 = shl i64 %conv16, 40 + %or18 = or i64 %or14, %shl17 + %arrayidx19 = getelementptr inbounds i8, ptr %ptr, i64 6 + %6 = load i8, ptr %arrayidx19, align 1 + %conv20 = zext i8 %6 to i64 + %shl21 = shl i64 %conv20, 48 + %or22 = or i64 %or18, %shl21 + %arrayidx23 = getelementptr inbounds i8, ptr %ptr, i64 7 + %7 = load i8, ptr %arrayidx23, align 1 + %conv24 = zext i8 %7 to i64 + %shl25 = shl i64 %conv24, 56 + %or26 = or i64 %or22, %shl25 + ret i64 %or26 +} + +declare void @use(i8) +declare void @use2(i32) + +define i32 @loadCombine_4consecutive_hasOneUse1(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_hasOneUse1( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: call void @use(i8 [[L1]]) +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: call void @use(i8 [[L2]]) +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: call void @use(i8 [[L3]]) +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: call void @use(i8 [[L4]]) +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + call void @use(i8 %l1) + %l2 = load i8, ptr %p1 + call void @use(i8 %l2) + %l3 = load i8, ptr %p2 + call void @use(i8 %l3) + %l4 = load i8, ptr %p3 + call void @use(i8 %l4) + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 0 + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_hasOneUse2(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_hasOneUse2( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: call void @use(i32 [[E1]]) +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: call void @use(i32 [[E2]]) +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: call void @use(i32 [[E3]]) +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: call void @use(i32 [[E4]]) +; ALL-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + call void @use(i32 %e1) + %e2 = zext i8 %l2 to i32 + call void @use(i32 %e2) + %e3 = zext i8 %l3 to i32 + call void @use(i32 %e3) + %e4 = zext i8 %l4 to i32 + call void @use(i32 %e4) + + %s1 = shl i32 %e1, 0 + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_hasOneUse3(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_hasOneUse3( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: call void @use(i32 [[E1]]) +; ALL-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; ALL-NEXT: call void @use(i32 [[S2]]) +; ALL-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; ALL-NEXT: call void @use(i32 [[S3]]) +; ALL-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; ALL-NEXT: call void @use(i32 [[S4]]) +; ALL-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 0 + call void @use(i32 %s1) + %s2 = shl i32 %e2, 8 + call void @use(i32 %s2) + %s3 = shl i32 %e3, 16 + call void @use(i32 %s3) + %s4 = shl i32 %e4, 24 + call void @use(i32 %s4) + + %o1 = or i32 %s1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_hasOneUse4(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_hasOneUse4( +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; LE-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32 +; LE-NEXT: call void @use(i32 [[TMP1]]) +; LE-NEXT: [[O2:%.*]] = or i32 [[S3]], [[TMP1]] +; LE-NEXT: call void @use(i32 [[O2]]) +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_hasOneUse4( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; BE-NEXT: call void @use(i32 [[O1]]) +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: call void @use(i32 [[O2]]) +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s1 = shl i32 %e1, 0 + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s1, %s2 + call void @use(i32 %o1) + %o2 = or i32 %o1, %s3 + call void @use(i32 %o2) + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_hasOneUse5(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_hasOneUse5( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: call void @use(i8 [[L1]]) +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: call void @use(i8 [[L2]]) +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: call void @use(i8 [[L3]]) +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: call void @use(i8 [[L4]]) +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: call void @use(i32 [[E1]]) +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: call void @use(i32 [[E2]]) +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: call void @use(i32 [[E3]]) +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: call void @use(i32 [[E4]]) +; ALL-NEXT: call void @use(i32 [[E1]]) +; ALL-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; ALL-NEXT: call void @use(i32 [[S2]]) +; ALL-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; ALL-NEXT: call void @use(i32 [[S3]]) +; ALL-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; ALL-NEXT: call void @use(i32 [[S4]]) +; ALL-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; ALL-NEXT: call void @use(i32 [[O1]]) +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: call void @use(i32 [[O2]]) +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + call void @use(i8 %l1) + %l2 = load i8, ptr %p1 + call void @use(i8 %l2) + %l3 = load i8, ptr %p2 + call void @use(i8 %l3) + %l4 = load i8, ptr %p3 + call void @use(i8 %l4) + + %e1 = zext i8 %l1 to i32 + call void @use(i32 %e1) + %e2 = zext i8 %l2 to i32 + call void @use(i32 %e2) + %e3 = zext i8 %l3 to i32 + call void @use(i32 %e3) + %e4 = zext i8 %l4 to i32 + call void @use(i32 %e4) + + %s1 = shl i32 %e1, 0 + call void @use(i32 %s1) + %s2 = shl i32 %e2, 8 + call void @use(i32 %s2) + %s3 = shl i32 %e3, 16 + call void @use(i32 %s3) + %s4 = shl i32 %e4, 24 + call void @use(i32 %s4) + + %o1 = or i32 %s1, %s2 + call void @use(i32 %o1) + %o2 = or i32 %o1, %s3 + call void @use(i32 %o2) + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_parLoad1(ptr %p) { +; LE-LABEL: @loadCombine_parLoad1( +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 2 +; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32 +; LE-NEXT: [[O2:%.*]] = or i32 [[S3]], [[TMP1]] +; LE-NEXT: ret i32 [[O2]] +; +; BE-LABEL: @loadCombine_parLoad1( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; BE-NEXT: [[O1:%.*]] = or i32 [[S2]], [[E1]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: ret i32 [[O2]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + + %s1 = shl i32 %e1, 0 + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + + %o1 = or i32 %s1, %s2 + %o2 = or i32 %o1, %s3 + ret i32 %o2 +}