Index: llvm/include/llvm/Transforms/Scalar/SROA.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/SROA.h +++ llvm/include/llvm/Transforms/Scalar/SROA.h @@ -123,6 +123,7 @@ AssumptionCache &RunAC); bool presplitLoadsAndStores(AllocaInst &AI, sroa::AllocaSlices &AS); + bool presplitOverlappedSlices(AllocaInst &AI, sroa::AllocaSlices &AS); AllocaInst *rewritePartition(AllocaInst &AI, sroa::AllocaSlices &AS, sroa::Partition &P); bool splitAlloca(AllocaInst &AI, sroa::AllocaSlices &AS); Index: llvm/lib/Transforms/Scalar/SROA.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SROA.cpp +++ llvm/lib/Transforms/Scalar/SROA.cpp @@ -3628,6 +3628,12 @@ return SubTy; } +// Fore each load/store record the corresponding slice and split positions. +struct SplitOffsets { + Slice *S; + std::vector Splits; +}; + /// Pre-split loads and stores to simplify rewriting. /// /// We want to break up the splittable load+store pairs as much as @@ -3672,10 +3678,6 @@ // can find them via a direct lookup. This is important to cross-check loads // and stores against each other. We also track the slice so that we can kill // all the slices that end up split. - struct SplitOffsets { - Slice *S; - std::vector Splits; - }; SmallDenseMap SplitOffsetsMap; // Track loads out of this alloca which cannot, for any reason, be pre-split. @@ -3909,7 +3911,7 @@ NewSlices.push_back( Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, &PLoad->getOperandUse(PLoad->getPointerOperandIndex()), - /*IsSplittable*/ false)); + /*IsSplittable*/ true)); LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() << ", " << NewSlices.back().endOffset() << "): " << *PLoad << "\n"); @@ -4058,7 +4060,7 @@ NewSlices.push_back( Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, &PStore->getOperandUse(PStore->getPointerOperandIndex()), - /*IsSplittable*/ false)); + /*IsSplittable*/ true)); LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() << ", " << NewSlices.back().endOffset() << "): " << *PStore << "\n"); @@ -4135,6 +4137,239 @@ return true; } +// Limit the number of times presplitOverlappedSlices is called. +#define MAX_PRESPLIT_ITERATIONS 128 + +/// Pre-split overlapped AllocaSlices like following to simplify rewriting. +/// +/// S1 ------ +/// S2 ------ +/// +/// Here we want to split S1 at the begin offset of S2. So it changes to +/// +/// S11 --- +/// S12 --- +/// S2 ------ +/// +/// \returns true if any changes are made. +bool SROA::presplitOverlappedSlices(AllocaInst &AI, sroa::AllocaSlices &AS) { + LLVM_DEBUG(dbgs() << "Pre-splitting overlapped slices\n"); + + // Track the loads and stores which are candidates for splitting. + SmallVector Loads; + SmallVector Stores; + SmallDenseMap SplitOffsetsMap; + + for (auto &P : AS.partitions()) { + bool Found = false; + for (Slice &S1 : P) { + if (!S1.isSplittable()) + continue; + for (Slice &S2 : P) { + // We are interested in following case only: + // + // S1 ------ + // S2 ------ + if ((S1.beginOffset() >= S2.beginOffset()) || + (S1.endOffset() >= S2.endOffset()) || + (S1.endOffset() <= S2.beginOffset())) + continue; + + // Found the overlapped case, record the instruction. + Instruction *I = cast(S1.getUse()->getUser()); + if (auto *LI = dyn_cast(I)) { + assert(!LI->isVolatile() && "Cannot split volatile loads!"); + Loads.push_back(LI); + } else if (auto *SI = dyn_cast(I)) { + if (S1.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex())) + // Skip stores *of* pointers. + continue; + assert(!SI->isVolatile() && "Cannot split volatile stores!"); + Stores.push_back(SI); + } else { + // Other uses cannot be pre-split. + continue; + } + + // We can split S1 at the position S2.beginOffset(). + LLVM_DEBUG(dbgs() << " Candidate: " << *I << "\n"); + auto &Offsets = SplitOffsetsMap[I]; + assert(Offsets.Splits.empty()); + Offsets.S = &S1; + Offsets.Splits.push_back(S2.beginOffset() - S1.beginOffset()); + + Found = true; + break; + } + + if (Found) + break; + } + } + + // Collect the new slices which we will merge into the alloca slices. + SmallVector NewSlices; + std::vector SplitInsts; + IRBuilderTy IRB(&AI); + const DataLayout &DL = AI.getModule()->getDataLayout(); + + for (LoadInst *LI : Loads) { + SplitInsts.clear(); + + IntegerType *Ty = cast(LI->getType()); + uint64_t LoadSize = Ty->getBitWidth() / 8; + + auto &Offsets = SplitOffsetsMap[LI]; + assert(LoadSize == Offsets.S->endOffset() - Offsets.S->beginOffset() && + "Slice size should always match load size exactly!"); + uint64_t BaseOffset = Offsets.S->beginOffset(); + Instruction *BasePtr = cast(LI->getPointerOperand()); + + auto AS = LI->getPointerAddressSpace(); + IRB.SetInsertPoint(LI); + + assert(Offsets.Splits.size() == 1); + uint64_t PartOffset = 0, PartSize = Offsets.Splits.front(); + for (int i=0; i<2; i++) { + auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); + auto *PartPtrTy = PartTy->getPointerTo(AS); + LoadInst *PLoad = IRB.CreateAlignedLoad( + PartTy, + getAdjustedPtr(IRB, DL, BasePtr, + APInt(DL.getIndexSizeInBits(AS), PartOffset), + PartPtrTy, BasePtr->getName() + "."), + getAdjustedAlignment(LI, PartOffset, DL), + /*IsVolatile*/ false, LI->getName()); + PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); + + // Record the part load so later we can combine the loaded values into a + // single integer. + SplitInsts.push_back(PLoad); + + // Now build a new slice for the alloca. + NewSlices.push_back( + Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, + &PLoad->getOperandUse(PLoad->getPointerOperandIndex()), + /*IsSplittable*/ true)); + LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() + << ", " << NewSlices.back().endOffset() + << "): " << *PLoad << "\n"); + + // Setup the next partition. + PartOffset = PartSize; + PartSize = LoadSize - PartSize; + } + + // Combine 2 loaded value into a single integer. + Value *V1 = IRB.CreateZExt(SplitInsts[0], Ty, LI->getName() + ".ext.0"); + Value *V2 = IRB.CreateZExt(SplitInsts[1], Ty, LI->getName() + ".ext.1"); + + PartSize = Offsets.Splits.front(); + if (DL.isBigEndian()) { + uint64_t ShAmt = 8 * (LoadSize - PartSize); + V1 = IRB.CreateShl(V1, ShAmt, LI->getName() + ".shift"); + } else { + uint64_t ShAmt = 8 * PartSize; + V2 = IRB.CreateShl(V2, ShAmt, LI->getName() + ".shift"); + } + + Value *V = IRB.CreateOr(V1, V2, LI->getName() + ".or"); + LI->replaceAllUsesWith(V); + + // Mark the original load as dead and kill the original slice. + DeadInsts.insert(LI); + Offsets.S->kill(); + } + + for (StoreInst *SI : Stores) { + SplitInsts.clear(); + IRB.SetInsertPoint(SI); + + auto *V = SI->getValueOperand(); + IntegerType *Ty = cast(V->getType()); + uint64_t StoreSize = Ty->getBitWidth() / 8; + + auto &Offsets = SplitOffsetsMap[SI]; + assert(StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() && + "Slice size should always match load size exactly!"); + uint64_t BaseOffset = Offsets.S->beginOffset(); + Instruction *StoreBasePtr = cast(SI->getPointerOperand()); + + assert(Offsets.Splits.size() == 1); + uint64_t PartSize = Offsets.Splits.front(); + + // Split the store value into 2 parts. + auto *LowTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); + auto *HighTy = Type::getIntNTy(Ty->getContext(), + (StoreSize - PartSize) * 8); + + auto *V1 = V; + auto *V2 = V; + if (DL.isBigEndian()) { + uint64_t ShAmt = 8 * (StoreSize - PartSize); + V1 = IRB.CreateLShr(V1, ShAmt, SI->getName() + ".shift"); + } else { + uint64_t ShAmt = 8 * PartSize; + V2 = IRB.CreateLShr(V2, ShAmt, SI->getName() + ".shift"); + } + + V1 = IRB.CreateTrunc(V1, LowTy, SI->getName() + ".trunc.0"); + V2 = IRB.CreateTrunc(V2, HighTy, SI->getName() + ".trunc.1"); + SplitInsts.push_back(V1); + SplitInsts.push_back(V2); + + // Now we can store the 2 parts. + auto AS = SI->getPointerAddressSpace(); + uint64_t PartOffset = 0; + for (int i=0; i<2; i++) { + Value *SV = SplitInsts[i]; + auto *PartTy = SV->getType(); + auto *StorePartPtrTy = PartTy->getPointerTo(AS); + + StoreInst *PStore = IRB.CreateAlignedStore(SV, + getAdjustedPtr(IRB, DL, StoreBasePtr, + APInt(DL.getIndexSizeInBits(AS), PartOffset), + StorePartPtrTy, StoreBasePtr->getName() + "."), + getAdjustedAlignment(SI, PartOffset, DL), + /*IsVolatile*/ false); + + // Build a new slice for the alloca. + NewSlices.push_back( + Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, + &PStore->getOperandUse(PStore->getPointerOperandIndex()), + /*IsSplittable*/ true)); + LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() + << ", " << NewSlices.back().endOffset() + << "): " << *PStore << "\n"); + + // Setup the next part. + PartOffset = PartSize; + PartSize = StoreSize - PartSize; + } + + // Mark the original store as dead and kill the original slice. + DeadInsts.insert(SI); + Offsets.S->kill(); + } + + // Remove the killed slices that have ben pre-split. + AS.erase(llvm::remove_if(AS, [](const Slice &S) { return S.isDead(); }), + AS.end()); + + // Insert our new slices. This will sort and merge them into the sorted + // sequence. + AS.insert(NewSlices); + + LLVM_DEBUG(dbgs() << " Pre-split slices:\n"); +#ifndef NDEBUG + for (auto I = AS.begin(), E = AS.end(); I != E; ++I) + LLVM_DEBUG(AS.print(dbgs(), I, " ")); +#endif + + return SplitOffsetsMap.size() > 0; +} + /// Rewrite an alloca partition's users. /// /// This routine drives both of the rewriting goals of the SROA pass. It tries @@ -4297,6 +4532,13 @@ // First try to pre-split loads and stores. Changed |= presplitLoadsAndStores(AI, AS); + int PresplitTimes = 0; + bool LocalChanged = true; + while (LocalChanged && PresplitTimes < MAX_PRESPLIT_ITERATIONS) { + LocalChanged = presplitOverlappedSlices(AI, AS); + Changed |= LocalChanged; + PresplitTimes++; + } // Now that we have identified any pre-splitting opportunities, // mark loads and stores unsplittable except for the following case. Index: llvm/test/Transforms/SROA/basictest.ll =================================================================== --- llvm/test/Transforms/SROA/basictest.ll +++ llvm/test/Transforms/SROA/basictest.ll @@ -103,14 +103,15 @@ ; Avoid crashing when load/storing at at different offsets. define i64 @test2_addrspacecast_gep_offset(i64 %X) { ; CHECK-LABEL: @test2_addrspacecast_gep_offset( -; CHECK: %A.sroa.0 = alloca [10 x i8] -; CHECK: [[GEP0:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* %A.sroa.0, i16 0, i16 2 -; CHECK-NEXT: [[GEP1:%.*]] = addrspacecast i8* [[GEP0]] to i64 addrspace(1)* -; CHECK-NEXT: store i64 %X, i64 addrspace(1)* [[GEP1]], align 1 +; CHECK: %A.sroa.1.32.extract.trunc = trunc i64 %X to i48 +; CHECK-NEXT: %A.sroa.3.32.extract.shift = lshr i64 %X, 48 +; CHECK-NEXT: %A.sroa.3.32.extract.trunc = trunc i64 %A.sroa.3.32.extract.shift to i16 ; CHECK: br -; CHECK: [[BITCAST:%.*]] = bitcast [10 x i8]* %A.sroa.0 to i64* -; CHECK: %A.sroa.0.0.A.sroa.0.30.Z = load i64, i64* [[BITCAST]], align 1 +; CHECK: %Z.ext.0 = zext i16 undef to i64 +; CHECK-NEXT: %Z.ext.1 = zext i48 %A.sroa.1.32.extract.trunc to i64 +; CHECK-NEXT: %Z.shift = shl i64 %Z.ext.1, 16 +; CHECK-NEXT: %Z.or = or i64 %Z.ext.0, %Z.shift ; CHECK-NEXT: ret entry: %A = alloca [256 x i8] @@ -134,10 +135,7 @@ ; CHECK-NOT: alloca ; CHECK: %[[test3_a1:.*]] = alloca [42 x i8] ; CHECK-NEXT: %[[test3_a2:.*]] = alloca [99 x i8] -; CHECK-NEXT: %[[test3_a3:.*]] = alloca [16 x i8] ; CHECK-NEXT: %[[test3_a4:.*]] = alloca [42 x i8] -; CHECK-NEXT: %[[test3_a5:.*]] = alloca [7 x i8] -; CHECK-NEXT: %[[test3_a6:.*]] = alloca [7 x i8] ; CHECK-NEXT: %[[test3_a7:.*]] = alloca [85 x i8] %b = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 0 @@ -150,19 +148,86 @@ ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [99 x i8], [99 x i8]* %[[test3_a2]], i64 0, i64 0 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 99, {{.*}}), !tbaa [[TAG_0:!.*]] ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 142 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 2 %[[gep_src]], i32 16, {{.*}}), !tbaa [[TAG_0:!.*]] +; CHECK-NEXT: %[[dummy:.*]] = load i8, i8* %[[gep_src]], align 2, !tbaa [[TAG_0]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 143 +; CHECK-NEXT: %[[dummy:.*]] = load i8, i8* %[[gep_src]], align 1, !tbaa [[TAG_0]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 144 +; CHECK-NEXT: %[[dummy:.*]] = load i8, i8* %[[gep_src]], align 8, !tbaa [[TAG_0]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 145 +; CHECK-NEXT: %[[dummy:.*]] = load i8, i8* %[[gep_src]], align 1, !tbaa [[TAG_0]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 146 +; CHECK-NEXT: %[[dummy:.*]] = load i8, i8* %[[gep_src]], align 2, !tbaa [[TAG_0]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 147 +; CHECK-NEXT: %[[dummy:.*]] = load i8, i8* %[[gep_src]], align 1, !tbaa [[TAG_0]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 148 +; CHECK-NEXT: %[[dummy:.*]] = load i8, i8* %[[gep_src]], align 4, !tbaa [[TAG_0]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 149 +; CHECK-NEXT: %[[dummy:.*]] = load i8, i8* %[[gep_src]], align 1, !tbaa [[TAG_0]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 150 +; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep_src]] to i64* +; CHECK-NEXT: %[[src150:.*]] = load i64, i64* %[[bitcast]], align 2, !tbaa [[TAG_0]] +; CHECK-NEXT: %[[src150_trunc:.*]] = trunc i64 %[[src150]] to i56 +; CHECK-NEXT: %[[src150_trunc_trunc:.*]] = trunc i56 %[[src150_trunc]] to i48 +; CHECK-NEXT: %[[src150_trunc_trunc_trunc:.*]] = trunc i48 %[[src150_trunc_trunc]] to i40 +; CHECK-NEXT: %[[src150_trunc_trunc_trunc_trunc:.*]] = trunc i40 %[[src150_trunc_trunc_trunc]] to i32 +; CHECK-NEXT: %[[src150_trunc_trunc_trunc_trunc_trunc:.*]] = trunc i32 %[[src150_trunc_trunc_trunc_trunc]] to i24 +; CHECK-NEXT: %[[src150_trunc_trunc_trunc_trunc_trunc_trunc:.*]] = trunc i24 %[[src150_trunc_trunc_trunc_trunc_trunc]] to i16 +; CHECK-NEXT: %[[dummy:.*]] = trunc i16 %[[src150_trunc_trunc_trunc_trunc_trunc_trunc]] to i8 +; CHECK-NEXT: %[[src150_trunc_trunc_trunc_trunc_trunc_trunc_lshr:.*]] = lshr i16 %[[src150_trunc_trunc_trunc_trunc_trunc_trunc]], 8 +; CHECK-NEXT: %[[dummy:.*]] = trunc i16 %[[src150_trunc_trunc_trunc_trunc_trunc_trunc_lshr]] to i8 +; CHECK-NEXT: %[[src150_trunc_trunc_trunc_trunc_trunc_lshr:.*]] = lshr i24 %[[src150_trunc_trunc_trunc_trunc_trunc]], 16 +; CHECK-NEXT: %[[dummy:.*]] = trunc i24 %[[src150_trunc_trunc_trunc_trunc_trunc_lshr]] to i8 +; CHECK-NEXT: %[[src150_trunc_trunc_trunc_trunc_lshr:.*]] = lshr i32 %[[src150_trunc_trunc_trunc_trunc]], 24 +; CHECK-NEXT: %[[dummy:.*]] = trunc i32 %[[src150_trunc_trunc_trunc_trunc_lshr]] to i8 +; CHECK-NEXT: %[[src150_trunc_trunc_trunc_lshr:.*]] = lshr i40 %[[src150_trunc_trunc_trunc]], 32 +; CHECK-NEXT: %[[dummy:.*]] = trunc i40 %[[src150_trunc_trunc_trunc_lshr]] to i8 +; CHECK-NEXT: %[[src150_trunc_trunc_lshr:.*]] = lshr i48 %[[src150_trunc_trunc]], 40 +; CHECK-NEXT: %[[dummy:.*]] = trunc i48 %[[src150_trunc_trunc_lshr]] to i8 +; CHECK-NEXT: %[[src150_trunc_lshr:.*]] = lshr i56 %[[src150_trunc]], 48 +; CHECK-NEXT: %[[dummy:.*]] = trunc i56 %[[src150_trunc_lshr]] to i8 +; CHECK-NEXT: %[[src150_lshr:.*]] = lshr i64 %[[src150]], 56 +; CHECK-NEXT: %[[dummy:.*]] = trunc i64 %[[src150_lshr]] to i8 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 158 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 0 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 2 %[[gep_src]], i32 42, {{.*}}), !tbaa [[TAG_0:!.*]] ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 200 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0:!.*]] +; CHECK-NEXT: %[[dummy:.*]] = load i8, i8* %[[gep_src]], align 8, !tbaa [[TAG_0]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 201 +; CHECK-NEXT: %[[dummy:.*]] = load i8, i8* %[[gep_src]], align 1, !tbaa [[TAG_0]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 202 +; CHECK-NEXT: %[[dummy:.*]] = load i8, i8* %[[gep_src]], align 2, !tbaa [[TAG_0]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 203 +; CHECK-NEXT: %[[bc_src:.*]] = bitcast i8* %[[gep_src]] to i32* +; CHECK-NEXT: %[[i32_203:.*]] = load i32, i32* %[[bc_src]], align 1, !tbaa [[TAG_0]] +; CHECK-NEXT: %[[i32_203_trunc:.*]] = trunc i32 %[[i32_203]] to i24 +; CHECK-NEXT: %[[i32_203_trunc_trunc:.*]] = trunc i24 %[[i32_203_trunc]] to i16 +; CHECK-NEXT: %[[dummy:.*]] = trunc i16 %[[i32_203_trunc_trunc]] to i8 +; CHECK-NEXT: %[[i32_203_trunc_trunc_lshr:.*]] = lshr i16 %[[i32_203_trunc_trunc]], 8 +; CHECK-NEXT: %[[dummy:.*]] = trunc i16 %[[i32_203_trunc_trunc_lshr]] to i8 +; CHECK-NEXT: %[[i32_203_trunc_lshr:.*]] = lshr i24 %[[i32_203_trunc]], 16 +; CHECK-NEXT: %[[dummy:.*]] = trunc i24 %[[i32_203_trunc_lshr]] to i8 +; CHECK-NEXT: %[[i32_203_lshr:.*]] = lshr i32 %[[i32_203]], 24 +; CHECK-NEXT: %[[dummy:.*]] = trunc i32 %[[i32_203_lshr]] to i8 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 207 ; CHECK-NEXT: %[[test3_r2:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]] ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 208 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0:!.*]] +; CHECK-NEXT: %[[dummy:.*]] = load i8, i8* %[[gep_src]], align 8, !tbaa [[TAG_0]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 209 +; CHECK-NEXT: %[[dummy:.*]] = load i8, i8* %[[gep_src]], align 1, !tbaa [[TAG_0]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 210 +; CHECK-NEXT: %[[dummy:.*]] = load i8, i8* %[[gep_src]], align 2, !tbaa [[TAG_0]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 211 +; CHECK-NEXT: %[[bc_src:.*]] = bitcast i8* %[[gep_src]] to i32* +; CHECK-NEXT: %[[i32_211:.*]] = load i32, i32* %[[bc_src]], align 1, !tbaa [[TAG_0]] +; CHECK-NEXT: %[[i32_211_trunc:.*]] = trunc i32 %[[i32_211]] to i24 +; CHECK-NEXT: %[[i32_211_trunc_trunc:.*]] = trunc i24 %[[i32_211_trunc]] to i16 +; CHECK-NEXT: %[[dummy:.*]] = trunc i16 %[[i32_211_trunc_trunc]] to i8 +; CHECK-NEXT: %[[i32_211_trunc_trunc_lshr:.*]] = lshr i16 %[[i32_211_trunc_trunc]], 8 +; CHECK-NEXT: %[[dummy:.*]] = trunc i16 %[[i32_211_trunc_trunc_lshr]] to i8 +; CHECK-NEXT: %[[i32_211_trunc_lshr:.*]] = lshr i24 %[[i32_211_trunc]], 16 +; CHECK-NEXT: %[[dummy:.*]] = trunc i24 %[[i32_211_trunc_lshr]] to i8 +; CHECK-NEXT: %[[i32_211_lshr:.*]] = lshr i32 %[[i32_211]], 24 +; CHECK-NEXT: %[[dummy:.*]] = trunc i32 %[[i32_211_lshr]] to i8 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 215 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 85, {{.*}}), !tbaa [[TAG_0:!.*]] @@ -194,49 +259,17 @@ %overlap.8.i64 = bitcast i8* %overlap.8.i8 to i64* %overlap.9.i64 = bitcast i8* %overlap.9.i8 to i64* store i8 1, i8* %overlap.1.i8, !tbaa !3 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0 -; CHECK-NEXT: store i8 1, i8* %[[gep]], align 1, !tbaa [[TAG_3:!.*]] store i16 1, i16* %overlap.1.i16, !tbaa !5 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i16* -; CHECK-NEXT: store i16 1, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_5:!.*]] store i32 1, i32* %overlap.1.i32, !tbaa !7 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i32* -; CHECK-NEXT: store i32 1, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_7:!.*]] store i64 1, i64* %overlap.1.i64, !tbaa !9 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i64* -; CHECK-NEXT: store i64 1, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_9:!.*]] store i64 2, i64* %overlap.2.i64, !tbaa !11 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 1 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 2, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_11:!.*]] store i64 3, i64* %overlap.3.i64, !tbaa !13 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 2 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 3, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_13:!.*]] store i64 4, i64* %overlap.4.i64, !tbaa !15 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 3 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 4, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_15:!.*]] store i64 5, i64* %overlap.5.i64, !tbaa !17 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 4 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 5, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_17:!.*]] store i64 6, i64* %overlap.6.i64, !tbaa !19 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 5 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 6, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_19:!.*]] store i64 7, i64* %overlap.7.i64, !tbaa !21 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 6 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 7, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_21:!.*]] store i64 8, i64* %overlap.8.i64, !tbaa !23 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 7 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 8, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_23:!.*]] store i64 9, i64* %overlap.9.i64, !tbaa !25 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 8 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 9, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_25:!.*]] ; Make two sequences of overlapping stores with more gaps and irregularities. %overlap2.1.0.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 200 @@ -255,26 +288,11 @@ %overlap2.1.2.i32 = bitcast i8* %overlap2.1.2.i8 to i32* %overlap2.1.3.i32 = bitcast i8* %overlap2.1.3.i8 to i32* store i8 1, i8* %overlap2.1.0.i8, !tbaa !27 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0 -; CHECK-NEXT: store i8 1, i8* %[[gep]], align 1, !tbaa [[TAG_27:!.*]] store i16 1, i16* %overlap2.1.0.i16, !tbaa !29 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i16* -; CHECK-NEXT: store i16 1, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_29:!.*]] store i32 1, i32* %overlap2.1.0.i32, !tbaa !31 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i32* -; CHECK-NEXT: store i32 1, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_31:!.*]] store i32 2, i32* %overlap2.1.1.i32, !tbaa !33 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 1 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 2, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_33:!.*]] store i32 3, i32* %overlap2.1.2.i32, !tbaa !35 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 2 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 3, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_35:!.*]] store i32 4, i32* %overlap2.1.3.i32, !tbaa !37 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 3 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 4, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_37:!.*]] %overlap2.2.0.i32 = bitcast i8* %overlap2.2.0.i8 to i32* %overlap2.2.1.i16 = bitcast i8* %overlap2.2.1.i8 to i16* @@ -282,53 +300,62 @@ %overlap2.2.2.i32 = bitcast i8* %overlap2.2.2.i8 to i32* %overlap2.2.3.i32 = bitcast i8* %overlap2.2.3.i8 to i32* store i32 1, i32* %overlap2.2.0.i32, !tbaa !39 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a6]] to i32* -; CHECK-NEXT: store i32 1, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_39:!.*]] store i8 1, i8* %overlap2.2.1.i8, !tbaa !41 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1 -; CHECK-NEXT: store i8 1, i8* %[[gep]], align 1, !tbaa [[TAG_41:!.*]] store i16 1, i16* %overlap2.2.1.i16, !tbaa !43 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16* -; CHECK-NEXT: store i16 1, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_43:!.*]] store i32 1, i32* %overlap2.2.1.i32, !tbaa !45 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 1, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_45:!.*]] store i32 3, i32* %overlap2.2.2.i32, !tbaa !47 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 2 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 3, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_47:!.*]] store i32 4, i32* %overlap2.2.3.i32, !tbaa !49 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 3 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 4, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_49:!.*]] %overlap2.prefix = getelementptr i8, i8* %overlap2.1.1.i8, i64 -4 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.prefix, i8* %src, i32 8, i1 false), !tbaa !51 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 39 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %src, i32 3, {{.*}}), !tbaa [[TAG_51:!.*]] ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 3 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 5, {{.*}}), !tbaa [[TAG_51]] +; CHECK-NEXT: %[[i8_3:.*]] = load i8, i8* %[[gep_src]], align 1, !tbaa [[TAG_51]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 4 +; CHECK-NEXT: %[[i8_4:.*]] = load i8, i8* %[[gep_src]], align 1, !tbaa [[TAG_51]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 5 +; CHECK-NEXT: %[[dummy:.*]] = load i8, i8* %[[gep_src]], align 1, !tbaa [[TAG_51]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 6 +; CHECK-NEXT: %[[bc_src:.*]] = bitcast i8* %[[gep_src]] to i16* +; CHECK-NEXT: %[[i16_6:.*]] = load i16, i16* %[[bc_src]], align 1, !tbaa [[TAG_51]] +; CHECK-NEXT: %[[dummy:.*]] = trunc i16 %[[i16_6]] to i8 +; CHECK-NEXT: %[[i16_6_lshr:.*]] = lshr i16 %[[i16_6]], 8 +; CHECK-NEXT: %[[dummy:.*]] = trunc i16 %[[i16_6_lshr]] to i8 ; Bridge between the overlapping areas call void @llvm.memset.p0i8.i32(i8* %overlap2.1.2.i8, i8 42, i32 8, i1 false), !tbaa !53 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 2 -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[gep]], i8 42, i32 5, {{.*}}), !tbaa [[TAG_53:!.*]] -; ...promoted i8 store... -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[gep]], i8 42, i32 2, {{.*}}), !tbaa [[TAG_53]] ; Entirely within the second overlap. call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.1.i8, i8* %src, i32 5, i1 false), !tbaa !55 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 5, {{.*}}), !tbaa [[TAG_55:!.*]] +; CHECK-NEXT: %[[i8_0:.*]] = load i8, i8* %src, align 1, !tbaa [[TAG_55:!.*]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 1 +; CHECK-NEXT: %[[dummy:.*]] = load i8, i8* %[[gep_src]], align 1, !tbaa [[TAG_55:!.*]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 2 +; CHECK-NEXT: %[[bc_src:.*]] = bitcast i8* %[[gep_src]] to i24* +; CHECK-NEXT: %[[i24_2:.*]] = load i24, i24* %[[bc_src]], align 1, !tbaa [[TAG_55]] +; CHECK-NEXT: %[[i24_2_trunc:.*]] = trunc i24 %[[i24_2]] to i16 +; CHECK-NEXT: %[[dummy:.*]] = trunc i16 %[[i24_2_trunc]] to i8 +; CHECK-NEXT: %[[i24_2_trunc_lshr:.*]] = lshr i16 %[[i24_2_trunc]], 8 +; CHECK-NEXT: %[[dummy:.*]] = trunc i16 %[[i24_2_trunc_lshr]] to i8 +; CHECK-NEXT: %[[i24_2_lshr:.*]] = lshr i24 %[[i24_2]], 16 +; CHECK-NEXT: %[[dummy:.*]] = trunc i24 %[[i24_2_lshr]] to i8 ; Trailing past the second overlap. call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.2.i8, i8* %src, i32 8, i1 false), !tbaa !57 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 2 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 5, {{.*}}), !tbaa [[TAG_57:!.*]] +; CHECK-NEXT: %[[i8_0_210:.*]] = load i8, i8* %src, align 1, !tbaa [[TAG_57:!.*]] +; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 1 +; CHECK-NEXT: %[[bc_src:.*]] = bitcast i8* %[[gep_src]] to i32* +; CHECK-NEXT: %[[i32_1:.*]] = load i32, i32* %[[bc_src]], align 1, !tbaa [[TAG_57]] +; CHECK-NEXT: %[[i32_1_trunc:.*]] = trunc i32 %[[i32_1]] to i24 +; CHECK-NEXT: %[[i32_1_trunc_trunc:.*]] = trunc i24 %[[i32_1_trunc]] to i16 +; CHECK-NEXT: %[[i32_1_trunc_trunc_trunc:.*]] = trunc i16 %[[i32_1_trunc_trunc]] to i8 +; CHECK-NEXT: %[[i32_1_trunc_trunc_lshr:.*]] = lshr i16 %[[i32_1_trunc_trunc]], 8 +; CHECK-NEXT: %[[i32_1_trunc_trunc_lshr_trunc:.*]] = trunc i16 %[[i32_1_trunc_trunc_lshr]] to i8 +; CHECK-NEXT: %[[i32_1_trunc_lshr:.*]] = lshr i24 %[[i32_1_trunc]], 16 +; CHECK-NEXT: %[[i32_1_trunc_lshr_trunc:.*]] = trunc i24 %[[i32_1_trunc_lshr]] to i8 +; CHECK-NEXT: %[[i32_1_lshr:.*]] = lshr i32 %[[i32_1]], 24 +; CHECK-NEXT: %[[i32_1_lshr_trunc:.*]] = trunc i32 %[[i32_1_lshr]] to i8 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 5 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 3, {{.*}}), !tbaa [[TAG_57]] @@ -342,19 +369,138 @@ ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [99 x i8], [99 x i8]* %[[test3_a2]], i64 0, i64 0 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 99, {{.*}}), !tbaa [[TAG_59]] ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 142 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 16, {{.*}}), !tbaa [[TAG_59]] +; CHECK-NEXT: store i8 1, i8* %[[gep_dst]], align 1, !tbaa [[TAG_59]] +; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 143 +; CHECK-NEXT: store i8 2, i8* %[[gep_dst]], align 1, !tbaa [[TAG_59]] +; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 144 +; CHECK-NEXT: store i8 3, i8* %[[gep_dst]], align 1, !tbaa [[TAG_59]] +; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 145 +; CHECK-NEXT: store i8 4, i8* %[[gep_dst]], align 1, !tbaa [[TAG_59]] +; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 146 +; CHECK-NEXT: store i8 5, i8* %[[gep_dst]], align 1, !tbaa [[TAG_59]] +; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 147 +; CHECK-NEXT: store i8 6, i8* %[[gep_dst]], align 1, !tbaa [[TAG_59]] +; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 148 +; CHECK-NEXT: store i8 7, i8* %[[gep_dst]], align 1, !tbaa [[TAG_59]] +; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 149 +; CHECK-NEXT: store i8 8, i8* %[[gep_dst]], align 1, !tbaa [[TAG_59]] +; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 150 +; CHECK-NEXT: %[[bc_dst:.*]] = bitcast i8* %[[gep_dst]] to i64* +; CHECK-NEXT: %[[zero:.*]] = zext i8 0 to i16 +; CHECK-NEXT: %[[zero_shl:.*]] = shl i16 %[[zero]], 8 +; CHECK-NEXT: %[[undef16:.*]] = and i16 undef, 255 +; CHECK-NEXT: %[[low16:.*]] = or i16 %[[undef16]], %[[zero_shl]] +; CHECK-NEXT: %[[nine:.*]] = zext i8 9 to i16 +; CHECK-NEXT: %[[low16_and:.*]] = and i16 %[[low16]], -256 +; CHECK-NEXT: %[[low16:.*]] = or i16 %[[low16_and]], %[[nine]] +; CHECK-NEXT: %[[zero:.*]] = zext i8 0 to i24 +; CHECK-NEXT: %[[zero_shl:.*]] = shl i24 %[[zero]], 16 +; CHECK-NEXT: %[[undef24:.*]] = and i24 undef, 65535 +; CHECK-NEXT: %[[high24:.*]] = or i24 %[[undef24]], %[[zero_shl]] +; CHECK-NEXT: %[[low16_zext:.*]] = zext i16 %[[low16]] to i24 +; CHECK-NEXT: %[[high24_and:.*]] = and i24 %[[high24]], -65536 +; CHECK-NEXT: %[[value24:.*]] = or i24 %[[high24_and]], %[[low16_zext]] +; CHECK-NEXT: %[[zero:.*]] = zext i8 0 to i32 +; CHECK-NEXT: %[[zero_shl:.*]] = shl i32 %[[zero]], 24 +; CHECK-NEXT: %[[undef32:.*]] = and i32 undef, 16777215 +; CHECK-NEXT: %[[value32:.*]] = or i32 %[[undef32]], %[[zero_shl]] +; CHECK-NEXT: %[[value24_zext:.*]] = zext i24 %[[value24]] to i32 +; CHECK-NEXT: %[[value32_and:.*]] = and i32 %[[value32]], -16777216 +; CHECK-NEXT: %[[value32:.*]] = or i32 %[[value32_and]], %[[value24_zext]] +; CHECK-NEXT: %[[zero:.*]] = zext i8 0 to i40 +; CHECK-NEXT: %[[zero_shl:.*]] = shl i40 %[[zero]], 32 +; CHECK-NEXT: %[[undef40:.*]] = and i40 undef, 4294967295 +; CHECK-NEXT: %[[value40:.*]] = or i40 %[[undef40]], %[[zero_shl]] +; CHECK-NEXT: %[[value32_zext:.*]] = zext i32 %[[value32]] to i40 +; CHECK-NEXT: %[[value40_and:.*]] = and i40 %[[value40]], -4294967296 +; CHECK-NEXT: %[[value40:.*]] = or i40 %[[value40_and]], %[[value32_zext]] +; CHECK-NEXT: %[[zero:.*]] = zext i8 0 to i48 +; CHECK-NEXT: %[[zero_shl:.*]] = shl i48 %[[zero]], 40 +; CHECK-NEXT: %[[undef48:.*]] = and i48 undef, 1099511627775 +; CHECK-NEXT: %[[value48:.*]] = or i48 %[[undef48]], %[[zero_shl]] +; CHECK-NEXT: %[[value40_zext:.*]] = zext i40 %[[value40]] to i48 +; CHECK-NEXT: %[[value48_and:.*]] = and i48 %[[value48]], -1099511627776 +; CHECK-NEXT: %[[value48:.*]] = or i48 %[[value48_and]], %[[value40_zext]] +; CHECK-NEXT: %[[zero:.*]] = zext i8 0 to i56 +; CHECK-NEXT: %[[zero_shl:.*]] = shl i56 %[[zero]], 48 +; CHECK-NEXT: %[[undef56:.*]] = and i56 undef, 281474976710655 +; CHECK-NEXT: %[[value56:.*]] = or i56 %[[undef56]], %[[zero_shl]] +; CHECK-NEXT: %[[value48_zext:.*]] = zext i48 %[[value48]] to i56 +; CHECK-NEXT: %[[value56_and:.*]] = and i56 %[[value56]], -281474976710656 +; CHECK-NEXT: %[[value56:.*]] = or i56 %[[value56_and]], %[[value48_zext]] +; CHECK-NEXT: %[[zero:.*]] = zext i8 0 to i64 +; CHECK-NEXT: %[[zero_shl:.*]] = shl i64 %[[zero]], 56 +; CHECK-NEXT: %[[undef64:.*]] = and i64 undef, 72057594037927935 +; CHECK-NEXT: %[[value64:.*]] = or i64 %[[undef64]], %[[zero_shl]] +; CHECK-NEXT: %[[value56_zext:.*]] = zext i56 %[[value56]] to i64 +; CHECK-NEXT: %[[value64_and:.*]] = and i64 %[[value64]], -72057594037927936 +; CHECK-NEXT: %[[value64:.*]] = or i64 %[[value64_and]], %[[value56_zext]] +; CHECK-NEXT: store i64 %[[value64]], i64* %[[bc_dst]], align 1, !tbaa [[TAG_59]] ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 158 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 0 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 42, {{.*}}), !tbaa [[TAG_59]] ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 200 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_59]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 207 -; CHECK-NEXT: store i8 42, i8* %[[gep]], {{.*}}, !tbaa [[TAG_59]] +; CHECK-NEXT: store i8 %[[i8_3]], i8* %[[gep_dst]], align 1, !tbaa [[TAG_59]] +; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 201 +; CHECK-NEXT: store i8 %[[i8_4]], i8* %[[gep_dst]], align 1, !tbaa [[TAG_59]] +; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 202 +; CHECK-NEXT: store i8 42, i8* %[[gep_dst]], align 1, !tbaa [[TAG_59]] +; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 203 +; CHECK-NEXT: %[[bc_dst:.*]] = bitcast i8* %[[gep_dst]] to i32* +; CHECK-NEXT: %[[byte:.*]] = zext i8 42 to i16 +; CHECK-NEXT: %[[byte_shl:.*]] = shl i16 %[[byte]], 8 +; CHECK-NEXT: %[[undef16:.*]] = and i16 undef, 255 +; CHECK-NEXT: %[[low16:.*]] = or i16 %[[undef16]], %[[byte_shl]] +; CHECK-NEXT: %[[byte:.*]] = zext i8 42 to i16 +; CHECK-NEXT: %[[low16_and:.*]] = and i16 %[[low16]], -256 +; CHECK-NEXT: %[[low16:.*]] = or i16 %[[low16_and]], %[[byte]] +; CHECK-NEXT: %[[byte:.*]] = zext i8 42 to i24 +; CHECK-NEXT: %[[byte_shl:.*]] = shl i24 %[[byte]], 16 +; CHECK-NEXT: %[[undef24:.*]] = and i24 undef, 65535 +; CHECK-NEXT: %[[high24:.*]] = or i24 %[[undef24]], %[[byte_shl]] +; CHECK-NEXT: %[[low16_zext:.*]] = zext i16 %[[low16]] to i24 +; CHECK-NEXT: %[[high24_and:.*]] = and i24 %[[high24]], -65536 +; CHECK-NEXT: %[[value24:.*]] = or i24 %[[high24_and]], %[[low16_zext]] +; CHECK-NEXT: %[[byte:.*]] = zext i8 42 to i32 +; CHECK-NEXT: %[[byte_shl:.*]] = shl i32 %[[byte]], 24 +; CHECK-NEXT: %[[undef32:.*]] = and i32 undef, 16777215 +; CHECK-NEXT: %[[value32:.*]] = or i32 %[[undef32]], %[[byte_shl]] +; CHECK-NEXT: %[[value24_zext:.*]] = zext i24 %[[value24]] to i32 +; CHECK-NEXT: %[[value32_and:.*]] = and i32 %[[value32]], -16777216 +; CHECK-NEXT: %[[value32:.*]] = or i32 %[[value32_and]], %[[value24_zext]] +; CHECK-NEXT: store i32 %[[value32]], i32* %[[bc_dst]], align 1, !tbaa [[TAG_59]] +; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 207 +; CHECK-NEXT: store i8 42, i8* %[[gep_dst]], {{.*}}, !tbaa [[TAG_59]] ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 208 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_59]] +; CHECK-NEXT: store i8 42, i8* %[[gep_dst]], {{.*}}, !tbaa [[TAG_59]] +; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 209 +; CHECK-NEXT: store i8 %[[i8_0]], i8* %[[gep_dst]], align 1, !tbaa [[TAG_59]] +; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 210 +; CHECK-NEXT: store i8 %[[i8_0_210]], i8* %[[gep_dst]], align 1, !tbaa [[TAG_59]] +; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 211 +; CHECK-NEXT: %[[bc_dst:.*]] = bitcast i8* %[[gep_dst]] to i32* +; CHECK-NEXT: %[[i32_1_trunc_trunc_lshr_trunc_zext:.*]] = zext i8 %[[i32_1_trunc_trunc_lshr_trunc]] to i16 +; CHECK-NEXT: %[[i32_1_trunc_trunc_lshr_trunc_zext_shl:.*]] = shl i16 %[[i32_1_trunc_trunc_lshr_trunc_zext]], 8 +; CHECK-NEXT: %[[undef16:.*]] = and i16 undef, 255 +; CHECK-NEXT: %[[second_byte:.*]] = or i16 %[[undef16]], %[[i32_1_trunc_trunc_lshr_trunc_zext_shl]] +; CHECK-NEXT: %[[i32_1_trunc_trunc_trunc_zext:.*]] = zext i8 %[[i32_1_trunc_trunc_trunc]] to i16 +; CHECK-NEXT: %[[masked_second_byte:.*]] = and i16 %[[second_byte]], -256 +; CHECK-NEXT: %[[low16:.*]] = or i16 %[[masked_second_byte]], %[[i32_1_trunc_trunc_trunc_zext]] +; CHECK-NEXT: %[[i32_1_trunc_lshr_trunc_zext:.*]] = zext i8 %[[i32_1_trunc_lshr_trunc]] to i24 +; CHECK-NEXT: %[[i32_1_trunc_lshr_trunc_zext_shl:.*]] = shl i24 %[[i32_1_trunc_lshr_trunc_zext]], 16 +; CHECK-NEXT: %[[undef24:.*]] = and i24 undef, 65535 +; CHECK-NEXT: %[[third_byte:.*]] = or i24 %[[undef24]], %[[i32_1_trunc_lshr_trunc_zext_shl]] +; CHECK-NEXT: %[[low16_zext:.*]] = zext i16 %[[low16]] to i24 +; CHECK-NEXT: %[[masked_third_byte:.*]] = and i24 %[[third_byte]], -65536 +; CHECK-NEXT: %[[value24:.*]] = or i24 %[[masked_third_byte]], %[[low16_zext]] +; CHECK-NEXT: %[[i32_1_lshr_trunc_zext:.*]] = zext i8 %[[i32_1_lshr_trunc]] to i32 +; CHECK-NEXT: %[[high_byte:.*]] = shl i32 %[[i32_1_lshr_trunc_zext]], 24 +; CHECK-NEXT: %[[undef32:.*]] = and i32 undef, 16777215 +; CHECK-NEXT: %[[value32:.*]] = or i32 %[[undef32]], %[[high_byte]] +; CHECK-NEXT: %[[value24_zext:.*]] = zext i24 %[[value24]] to i32 +; CHECK-NEXT: %[[value32_and:.*]] = and i32 %[[value32]], -16777216 +; CHECK-NEXT: %[[value32:.*]] = or i32 %[[value32_and]], %[[value24_zext]] +; CHECK-NEXT: store i32 %[[value32]], i32* %[[bc_dst]], align 1, !tbaa [[TAG_59]] ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 215 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 85, {{.*}}), !tbaa [[TAG_59]] @@ -415,7 +561,7 @@ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.1, i32 10, i1 false), !tbaa !3 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_3]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_3:!.*]] ; Clobber a single element of the array, this should be promotable, and be deleted. %c = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 42 @@ -425,11 +571,11 @@ call void @llvm.memmove.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.2, i32 10, i1 false), !tbaa !5 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_5]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_5:!.*]] call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 100, i1 false), !tbaa !7 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8], [20 x i8]* %[[test4_a1]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %[[gep]], i32 20, {{.*}}), !tbaa [[TAG_7]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %[[gep]], i32 20, {{.*}}), !tbaa [[TAG_7:!.*]] ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 20 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16* ; CHECK-NEXT: store i16 %[[test4_r1]], i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_7]] @@ -862,12 +1008,12 @@ ; CHECK-NEXT: %[[agep2:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[agep2]], i8 42, i32 %size, {{.*}}), !tbaa [[TAG_5]] ; CHECK-NEXT: %[[dstcast1:.*]] = bitcast i8* %dst to i32* -; CHECK-NEXT: store i32 42, i32* %[[dstcast1]], {{.*}}, !tbaa [[TAG_9]] +; CHECK-NEXT: store i32 42, i32* %[[dstcast1]], {{.*}}, !tbaa [[TAG_9:!.*]] ; CHECK-NEXT: %[[dstgep1:.*]] = getelementptr inbounds i8, i8* %dst, i64 4 ; CHECK-NEXT: %[[dstcast2:.*]] = bitcast i8* %[[dstgep1]] to i32* ; CHECK-NEXT: store i32 %[[srcload]], i32* %[[dstcast2]], {{.*}}, !tbaa [[TAG_9]] ; CHECK-NEXT: %[[agep3:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* align 1 %[[agep3]], i32 %size, {{.*}}), !tbaa [[TAG_11]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* align 1 %[[agep3]], i32 %size, {{.*}}), !tbaa [[TAG_11:!.*]] ; CHECK-NEXT: ret void entry: @@ -1970,48 +2116,8 @@ ; CHECK-DAG: [[TAG_9]] = !{[[TYPE_9]], [[TYPE_9]], i64 0, i64 1} ; CHECK-DAG: [[TYPE_11:!.*]] = !{{{.*}}, !"type_11"} ; CHECK-DAG: [[TAG_11]] = !{[[TYPE_11]], [[TYPE_11]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_13:!.*]] = !{{{.*}}, !"type_13"} -; CHECK-DAG: [[TAG_13]] = !{[[TYPE_13]], [[TYPE_13]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_15:!.*]] = !{{{.*}}, !"type_15"} -; CHECK-DAG: [[TAG_15]] = !{[[TYPE_15]], [[TYPE_15]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_17:!.*]] = !{{{.*}}, !"type_17"} -; CHECK-DAG: [[TAG_17]] = !{[[TYPE_17]], [[TYPE_17]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_19:!.*]] = !{{{.*}}, !"type_19"} -; CHECK-DAG: [[TAG_19]] = !{[[TYPE_19]], [[TYPE_19]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_21:!.*]] = !{{{.*}}, !"type_21"} -; CHECK-DAG: [[TAG_21]] = !{[[TYPE_21]], [[TYPE_21]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_23:!.*]] = !{{{.*}}, !"type_23"} -; CHECK-DAG: [[TAG_23]] = !{[[TYPE_23]], [[TYPE_23]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_25:!.*]] = !{{{.*}}, !"type_25"} -; CHECK-DAG: [[TAG_25]] = !{[[TYPE_25]], [[TYPE_25]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_27:!.*]] = !{{{.*}}, !"type_27"} -; CHECK-DAG: [[TAG_27]] = !{[[TYPE_27]], [[TYPE_27]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_29:!.*]] = !{{{.*}}, !"type_29"} -; CHECK-DAG: [[TAG_29]] = !{[[TYPE_29]], [[TYPE_29]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_31:!.*]] = !{{{.*}}, !"type_31"} -; CHECK-DAG: [[TAG_31]] = !{[[TYPE_31]], [[TYPE_31]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_33:!.*]] = !{{{.*}}, !"type_33"} -; CHECK-DAG: [[TAG_33]] = !{[[TYPE_33]], [[TYPE_33]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_35:!.*]] = !{{{.*}}, !"type_35"} -; CHECK-DAG: [[TAG_35]] = !{[[TYPE_35]], [[TYPE_35]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_37:!.*]] = !{{{.*}}, !"type_37"} -; CHECK-DAG: [[TAG_37]] = !{[[TYPE_37]], [[TYPE_37]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_39:!.*]] = !{{{.*}}, !"type_39"} -; CHECK-DAG: [[TAG_39]] = !{[[TYPE_39]], [[TYPE_39]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_41:!.*]] = !{{{.*}}, !"type_41"} -; CHECK-DAG: [[TAG_41]] = !{[[TYPE_41]], [[TYPE_41]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_43:!.*]] = !{{{.*}}, !"type_43"} -; CHECK-DAG: [[TAG_43]] = !{[[TYPE_43]], [[TYPE_43]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_45:!.*]] = !{{{.*}}, !"type_45"} -; CHECK-DAG: [[TAG_45]] = !{[[TYPE_45]], [[TYPE_45]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_47:!.*]] = !{{{.*}}, !"type_47"} -; CHECK-DAG: [[TAG_47]] = !{[[TYPE_47]], [[TYPE_47]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_49:!.*]] = !{{{.*}}, !"type_49"} -; CHECK-DAG: [[TAG_49]] = !{[[TYPE_49]], [[TYPE_49]], i64 0, i64 1} ; CHECK-DAG: [[TYPE_51:!.*]] = !{{{.*}}, !"type_51"} ; CHECK-DAG: [[TAG_51]] = !{[[TYPE_51]], [[TYPE_51]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_53:!.*]] = !{{{.*}}, !"type_53"} -; CHECK-DAG: [[TAG_53]] = !{[[TYPE_53]], [[TYPE_53]], i64 0, i64 1} ; CHECK-DAG: [[TYPE_55:!.*]] = !{{{.*}}, !"type_55"} ; CHECK-DAG: [[TAG_55]] = !{[[TYPE_55]], [[TYPE_55]], i64 0, i64 1} ; CHECK-DAG: [[TYPE_57:!.*]] = !{{{.*}}, !"type_57"} Index: llvm/test/Transforms/SROA/split-integer-be.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/split-integer-be.ll @@ -0,0 +1,27 @@ +; RUN: opt < %s -sroa -S | FileCheck %s + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +; CHECK-LABEL: @split_be +; CHECK-NOT: alloca +; CHECK: %[[x_lshr:.*]] = lshr i32 %X, 24 +; CHECK: %[[x_part:.*]] = trunc i32 %[[x_lshr]] to i8 +; CHECK: %[[y_lshr:.*]] = lshr i32 %Y, 8 +; CHECK: %[[y_part:.*]] = trunc i32 %[[y_lshr]] to i24 +; CHECK: %[[x_zext:.*]] = zext i8 %[[x_part]] to i32 +; CHECK-NEXT: %[[y_zext:.*]] = zext i24 %[[y_part]] to i32 +; CHECK-NEXT: %[[x_shl:.*]] = shl i32 %[[x_zext]], 24 +; CHECK-NEXT: %[[result:.*]] = or i32 %[[x_shl]], %[[y_zext]] +; CHECK-NEXT: ret i32 %[[result]] + +define i32 @split_be(i8* %dst, i32 %X, i32 %Y) { + %A = alloca [8 x i8] + %gep1 = getelementptr [8 x i8], [8 x i8]* %A, i16 0, i16 0 + %ptr1 = bitcast i8* %gep1 to i32* + %gep2 = getelementptr [8 x i8], [8 x i8]* %A, i16 0, i16 1 + %ptr2 = bitcast i8* %gep2 to i32* + store i32 %X, i32* %ptr1, align 4 + store i32 %Y, i32* %ptr2, align 1 + %res = load i32, i32* %ptr1, align 4 + ret i32 %res +} Index: llvm/test/Transforms/SROA/split-integer.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/split-integer.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -sroa -S | FileCheck %s + + +%inner = type { i32, i32 } +%outer = type { i8, %inner } + +; CHECK-LABEL: @foo +; CHECK-NOT: alloca +; CHECK-NOT: store +; CHECK-NOT: load + +define i64 @foo() { +entry: + %tmpstruct1 = alloca %outer, align 8 + %tmpstruct2 = alloca %outer, align 8 + %ptr1 = getelementptr inbounds %outer, %outer* %tmpstruct2, i64 0, i32 0 + store i8 0, i8* %ptr1, align 8 + %innerptr = getelementptr inbounds %outer, %outer* %tmpstruct2, i64 0, i32 1 + %ptr2 = bitcast %inner* %innerptr to i64* + store i64 4, i64* %ptr2, align 4 + %altptr = bitcast %outer* %tmpstruct2 to i64* + %split = load i64, i64* %altptr, align 8 + %construct1 = insertvalue { i64, i32 } undef, i64 %split, 0 + %construct2 = insertvalue { i64, i32 } %construct1, i32 0, 1 + %first64 = extractvalue { i64, i32 } %construct2, 0 + %last32 = extractvalue { i64, i32 } %construct2, 1 + %tmpptr = bitcast %outer* %tmpstruct1 to i64* + store i64 %first64, i64* %tmpptr + %lastptr = getelementptr inbounds %outer, %outer* %tmpstruct1, i64 0, i32 1, i32 1 + store i32 %last32, i32* %lastptr, align 8 + %flagptr = getelementptr inbounds %outer, %outer* %tmpstruct1, i64 0, i32 0 + %flag = load i8, i8* %flagptr, align 8 + %structptr = getelementptr inbounds %outer, %outer* %tmpstruct1, i64 0, i32 1 + %valptr = bitcast %inner* %structptr to i64* + %value = load i64, i64* %valptr, align 4 + %cond = icmp eq i8 %flag, 0 + br i1 %cond, label %true, label %exit + +exit: + %retv = phi i64 [ 4, %true ], [ %value, %entry ] + ret i64 %retv + +true: + br label %exit +}