diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -610,6 +610,7 @@ /// shift amount, zero extend type and loadSize. struct LoadOps { LoadInst *Root = nullptr; + LoadInst *RootInsert = nullptr; bool FoundRoot = false; uint64_t LoadSize = 0; Value *Shift = nullptr; @@ -675,16 +676,6 @@ Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2, /* AllowNonInbounds */ true); - // Make sure Load with lower Offset is at LI1 - bool Reverse = false; - if (Offset2.slt(Offset1)) { - std::swap(LI1, LI2); - std::swap(ShAmt1, ShAmt2); - std::swap(Offset1, Offset2); - std::swap(Load1Ptr, Load2Ptr); - Reverse = true; - } - // Verify if both loads have same base pointers and load sizes are same. uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits(); uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits(); @@ -695,20 +686,36 @@ if (LoadSize1 < 8 || !isPowerOf2_64(LoadSize1)) return false; - // TODO: Alias Analysis to check for stores b/w the loads. - // Currently bail out if there are stores b/w the loads. - LoadInst *Start = LI1, *End = LI2; - if (!LI1->comesBefore(LI2)) + // Alias Analysis to check for stores b/w the loads. + LoadInst *Start = LOps.FoundRoot ? LOps.RootInsert : LI1, *End = LI2; + MemoryLocation Loc; + if (!Start->comesBefore(End)) { std::swap(Start, End); + Loc = MemoryLocation::get(End); + if (LOps.FoundRoot) + Loc = Loc.getWithNewSize(LOps.LoadSize); + } else + Loc = MemoryLocation::get(End); unsigned NumScanned = 0; for (Instruction &Inst : make_range(Start->getIterator(), End->getIterator())) { - if (Inst.mayWriteToMemory()) + if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc))) return false; if (++NumScanned > MaxInstrsToScan) return false; } + // Make sure Load with lower Offset is at LI1 + bool Reverse = false; + if (Offset2.slt(Offset1)) { + std::swap(LI1, LI2); + std::swap(ShAmt1, ShAmt2); + std::swap(Offset1, Offset2); + std::swap(Load1Ptr, Load2Ptr); + std::swap(LoadSize1, LoadSize2); + Reverse = true; + } + // Big endian swap the shifts if (IsBigEndian) std::swap(ShAmt1, ShAmt2); @@ -746,6 +753,7 @@ AATags1 = LI1->getAAMetadata(); } LOps.LoadSize = LoadSize1 + LoadSize2; + LOps.RootInsert = Start; // Concatenate the AATags of the Merged Loads. LOps.AATags = AATags1.concat(AATags2); @@ -780,10 +788,16 @@ AS, LI1->getAlign(), &Fast); if (!Allowed || !Fast) return false; + + // Make sure the Load pointer of type GEP/non-GEP is above insert point + Instruction *Inst = dyn_cast(LI1->getPointerOperand()); + if (Inst && Inst->getParent() == LI1->getParent() && + !Inst->comesBefore(LOps.RootInsert)) + Inst->moveBefore(LOps.RootInsert); // New load can be generated Value *Load1Ptr = LI1->getPointerOperand(); - Builder.SetInsertPoint(LI1); + Builder.SetInsertPoint(LOps.RootInsert); Value *NewPtr = Builder.CreateBitCast(Load1Ptr, WiderType->getPointerTo(AS)); NewLoad = Builder.CreateAlignedLoad(WiderType, NewPtr, LI1->getAlign(), LI1->isVolatile(), ""); diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll --- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll @@ -142,26 +142,31 @@ } define i32 @loadCombine_4consecutive_alias(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_alias( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: store i8 10, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_alias( +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: store i8 10, ptr [[P]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_alias( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: store i8 10, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -188,26 +193,31 @@ } define i32 @loadCombine_4consecutive_alias_BE(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_alias_BE( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: store i8 10, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 -; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_alias_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: store i8 10, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_alias_BE( +; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; BE-NEXT: store i8 10, ptr [[P]], align 1 +; BE-NEXT: ret i32 [[L1]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1760,13 +1770,65 @@ } define i32 @loadCombine_4consecutive_badinsert(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_badinsert( +; LE-LABEL: @loadCombine_4consecutive_badinsert( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 1 +; LE-NEXT: store i8 0, ptr [[P1]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_badinsert( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: store i8 0, ptr [[P1]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + store i8 0, ptr %p1, align 1 + %l4 = load i8, ptr %p3 + %l1 = load i8, ptr %p + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_badinsert2(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_badinsert2( ; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 ; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 ; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 ; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: store i8 0, ptr [[P3]], align 1 ; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: store i8 0, ptr [[P1]], align 1 ; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 ; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 ; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 @@ -1785,8 +1847,8 @@ %p2 = getelementptr i8, ptr %p, i32 2 %p3 = getelementptr i8, ptr %p, i32 3 %l2 = load i8, ptr %p1 + store i8 0, ptr %p3, align 1 %l3 = load i8, ptr %p2 - store i8 0, ptr %p1, align 1 %l4 = load i8, ptr %p3 %l1 = load i8, ptr %p @@ -1805,16 +1867,143 @@ ret i32 %o3 } -define i32 @loadCombine_4consecutive_badinsert2(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_badinsert2( +define i32 @loadCombine_4consecutive_badinsert3(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_badinsert3( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P1]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_badinsert3( +; BE-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 4 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P4]], align 1 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P]], i32 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] +; + %p4 = getelementptr i8, ptr %p, i32 4 + %l4 = load i8, ptr %p4 + %e4 = zext i8 %l4 to i32 + %s4 = shl i32 %e4, 24 + + %p3 = getelementptr i8, ptr %p, i32 3 + %l3 = load i8, ptr %p3 + %e3 = zext i8 %l3 to i32 + %s3 = shl i32 %e3, 16 + + %p2 = getelementptr i8, ptr %p, i32 2 + %l2 = load i8, ptr %p2 + %e2 = zext i8 %l2 to i32 + %s2 = shl i32 %e2, 8 + + %p1 = getelementptr i8, ptr %p, i32 1 + %l1 = load i8, ptr %p1 + %e1 = zext i8 %l1 to i32 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + + +define i32 @loadCombine_4consecutive_badinsert4(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_badinsert4( +; LE-NEXT: entry: +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; LE-NEXT: [[C1:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[CMP:%.*]] = icmp eq i8 [[C1]], 0 +; LE-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[BB2:%.*]] +; LE: bb2: +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P1]], align 1 +; LE-NEXT: br label [[END]] +; LE: end: +; LE-NEXT: [[COND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[L1]], [[BB2]] ] +; LE-NEXT: ret i32 [[COND]] +; +; BE-LABEL: @loadCombine_4consecutive_badinsert4( +; BE-NEXT: entry: +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; BE-NEXT: [[C1:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[CMP:%.*]] = icmp eq i8 [[C1]], 0 +; BE-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[BB2:%.*]] +; BE: bb2: +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[C2:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i64 4 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P4]], align 1 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; BE-NEXT: [[O1:%.*]] = or i32 [[S2]], [[C2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: br label [[END]] +; BE: end: +; BE-NEXT: [[COND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[O3]], [[BB2]] ] +; BE-NEXT: ret i32 [[COND]] +; +entry: + %p1 = getelementptr i8, ptr %p, i64 1 + %c1 = load i8, ptr %p1, align 1 + %cmp = icmp eq i8 %c1, 0 + br i1 %cmp, label %end, label %bb2 + +bb2: + %l1 = load i8, ptr %p1, align 1 + %c2 = zext i8 %l1 to i32 + %p4 = getelementptr i8, ptr %p, i64 4 + %l4 = load i8, ptr %p4, align 1 + %e4 = zext i8 %l4 to i32 + %s4 = shl nuw i32 %e4, 24 + %p3 = getelementptr i8, ptr %p, i64 3 + %l3 = load i8, ptr %p3, align 1 + %e3 = zext i8 %l3 to i32 + %s3 = shl nuw nsw i32 %e3, 16 + %p2 = getelementptr i8, ptr %p, i64 2 + %l2 = load i8, ptr %p2, align 1 + %e2 = zext i8 %l2 to i32 + %s2 = shl nuw nsw i32 %e2, 8 + %o1 = or i32 %s2, %c2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + br label %end + +end: + %cond = phi i32 [ 0, %entry ], [ %o3, %bb2 ] + ret i32 %cond +} + +define i32 @loadCombine_4consecutive_badinsert5(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_badinsert5( ; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 ; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 ; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: store i8 0, ptr [[P3]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 ; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: store i8 0, ptr [[P2]], align 1 ; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 ; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 ; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 ; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 @@ -1830,11 +2019,11 @@ %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 %p3 = getelementptr i8, ptr %p, i32 3 - %l2 = load i8, ptr %p1 - store i8 0, ptr %p3, align 1 - %l3 = load i8, ptr %p2 %l4 = load i8, ptr %p3 + store i8 0, ptr %p2, align 1 %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 %e1 = zext i8 %l1 to i32 %e2 = zext i8 %l2 to i32 @@ -1850,3 +2039,49 @@ %o3 = or i32 %o2, %s4 ret i32 %o3 } + +define i32 @loadCombine_4consecutive_badinsert6(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_badinsert6( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: store i8 0, ptr [[P3]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[S3]], [[S4]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S2]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + store i8 0, ptr %p3, align 1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s3, %s4 + %o2 = or i32 %o1, %s2 + %o3 = or i32 %o2, %e1 + ret i32 %o3 +} diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll --- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll @@ -150,26 +150,31 @@ } define i32 @loadCombine_4consecutive_alias(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_alias( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: store i8 10, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_alias( +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: store i8 10, ptr [[P]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_alias( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: store i8 10, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -196,26 +201,31 @@ } define i32 @loadCombine_4consecutive_alias_BE(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_alias_BE( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: store i8 10, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 -; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_alias_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: store i8 10, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_alias_BE( +; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; BE-NEXT: store i8 10, ptr [[P]], align 1 +; BE-NEXT: ret i32 [[L1]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1851,16 +1861,22 @@ } define i16 @loadCombine_2consecutive_badinsert(ptr %p) { -; ALL-LABEL: @loadCombine_2consecutive_badinsert( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: store i8 0, ptr [[P1]], align 1 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 -; ALL-NEXT: [[S2:%.*]] = shl i16 [[E2]], 8 -; ALL-NEXT: [[O1:%.*]] = or i16 [[E1]], [[S2]] -; ALL-NEXT: ret i16 [[O1]] +; LE-LABEL: @loadCombine_2consecutive_badinsert( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 +; LE-NEXT: store i8 0, ptr [[P1]], align 1 +; LE-NEXT: ret i16 [[L1]] +; +; BE-LABEL: @loadCombine_2consecutive_badinsert( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: store i8 0, ptr [[P1]], align 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 +; BE-NEXT: [[S2:%.*]] = shl i16 [[E2]], 8 +; BE-NEXT: [[O1:%.*]] = or i16 [[E1]], [[S2]] +; BE-NEXT: ret i16 [[O1]] ; %p1 = getelementptr i8, ptr %p, i32 1 %l2 = load i8, ptr %p1 @@ -1874,26 +1890,32 @@ } define i32 @loadCombine_4consecutive_badinsert(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_badinsert( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: store i8 0, ptr [[P1]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_badinsert( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 1 +; LE-NEXT: store i8 0, ptr [[P1]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_badinsert( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: store i8 0, ptr [[P1]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1920,26 +1942,42 @@ } define i32 @loadCombine_4consecutive_badinsert2(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_badinsert2( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: store i8 0, ptr [[P3]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_badinsert2( +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 +; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32 +; LE-NEXT: store i8 0, ptr [[P3]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; LE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; LE-NEXT: [[O2:%.*]] = or i32 [[TMP1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_badinsert2( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: store i8 0, ptr [[P3]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1964,3 +2002,254 @@ %o3 = or i32 %o2, %s4 ret i32 %o3 } + +define i32 @loadCombine_4consecutive_badinsert3(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_badinsert3( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P1]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_badinsert3( +; BE-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 4 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P4]], align 1 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P]], i32 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] +; + %p4 = getelementptr i8, ptr %p, i32 4 + %l4 = load i8, ptr %p4 + %e4 = zext i8 %l4 to i32 + %s4 = shl i32 %e4, 24 + + %p3 = getelementptr i8, ptr %p, i32 3 + %l3 = load i8, ptr %p3 + %e3 = zext i8 %l3 to i32 + %s3 = shl i32 %e3, 16 + + %p2 = getelementptr i8, ptr %p, i32 2 + %l2 = load i8, ptr %p2 + %e2 = zext i8 %l2 to i32 + %s2 = shl i32 %e2, 8 + + %p1 = getelementptr i8, ptr %p, i32 1 + %l1 = load i8, ptr %p1 + %e1 = zext i8 %l1 to i32 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_badinsert4(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_badinsert4( +; LE-NEXT: entry: +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; LE-NEXT: [[C1:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[CMP:%.*]] = icmp eq i8 [[C1]], 0 +; LE-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[BB2:%.*]] +; LE: bb2: +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P1]], align 1 +; LE-NEXT: br label [[END]] +; LE: end: +; LE-NEXT: [[COND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[L1]], [[BB2]] ] +; LE-NEXT: ret i32 [[COND]] +; +; BE-LABEL: @loadCombine_4consecutive_badinsert4( +; BE-NEXT: entry: +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 +; BE-NEXT: [[C1:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[CMP:%.*]] = icmp eq i8 [[C1]], 0 +; BE-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[BB2:%.*]] +; BE: bb2: +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[C2:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i64 4 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P4]], align 1 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8 +; BE-NEXT: [[O1:%.*]] = or i32 [[S2]], [[C2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: br label [[END]] +; BE: end: +; BE-NEXT: [[COND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[O3]], [[BB2]] ] +; BE-NEXT: ret i32 [[COND]] +; +entry: + %p1 = getelementptr i8, ptr %p, i64 1 + %c1 = load i8, ptr %p1, align 1 + %cmp = icmp eq i8 %c1, 0 + br i1 %cmp, label %end, label %bb2 + +bb2: + %l1 = load i8, ptr %p1, align 1 + %c2 = zext i8 %l1 to i32 + %p4 = getelementptr i8, ptr %p, i64 4 + %l4 = load i8, ptr %p4, align 1 + %e4 = zext i8 %l4 to i32 + %s4 = shl nuw i32 %e4, 24 + %p3 = getelementptr i8, ptr %p, i64 3 + %l3 = load i8, ptr %p3, align 1 + %e3 = zext i8 %l3 to i32 + %s3 = shl nuw nsw i32 %e3, 16 + %p2 = getelementptr i8, ptr %p, i64 2 + %l2 = load i8, ptr %p2, align 1 + %e2 = zext i8 %l2 to i32 + %s2 = shl nuw nsw i32 %e2, 8 + %o1 = or i32 %s2, %c2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + br label %end + +end: + %cond = phi i32 [ 0, %entry ], [ %o3, %bb2 ] + ret i32 %cond +} + +define i32 @loadCombine_4consecutive_badinsert5(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_badinsert5( +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: store i8 0, ptr [[P2]], align 1 +; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 +; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; LE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; LE-NEXT: [[O2:%.*]] = or i32 [[TMP1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_badinsert5( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: store i8 0, ptr [[P2]], align 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l4 = load i8, ptr %p3 + store i8 0, ptr %p2, align 1 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_badinsert6(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_badinsert6( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: store i8 0, ptr [[P3]], align 1 +; LE-NEXT: [[L3:%.*]] = load i16, ptr [[P2]], align 1 +; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L3]] to i32 +; LE-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], 16 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; LE-NEXT: [[O2:%.*]] = or i32 [[TMP2]], [[S2]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_badinsert6( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: store i8 0, ptr [[P3]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[S3]], [[S4]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S2]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + store i8 0, ptr %p3, align 1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s3, %s4 + %o2 = or i32 %o1, %s2 + %o3 = or i32 %o2, %e1 + ret i32 %o3 +}