Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -215,6 +215,10 @@ "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking.")); +static cl::opt + EnableSplitLargeStruct("cgp-split-large-struct", cl::Hidden, cl::init(true), + cl::desc("Enable splitting large struct")); + namespace { using SetOfInstrs = SmallPtrSet; @@ -260,6 +264,11 @@ /// Keep track of sext chains based on their initial value. DenseMap SeenChainsForSExt; + // Keep track of GEPs accessing the same large structs that are candidates + // to be split later. + DenseMap, 32>> + LargeStructGEPMap; + /// Keep track of SExt promoted. ValueToSExts ValToSExtendedUses; @@ -328,6 +337,7 @@ SmallVectorImpl &SpeculativelyMovedExts); bool splitBranchCondition(Function &F); bool simplifyOffsetableRelocate(Instruction &I); + bool splitLargeStruct(); }; } // end anonymous namespace @@ -408,12 +418,23 @@ // to help generate sane code for PHIs involving such edges. EverMadeChange |= SplitIndirectBrCriticalEdges(F); + // Test if the target supports r+i addressing mode. If not, we don't need to + // split large structs. + if (EnableSplitLargeStruct) { + TargetLowering::AddrMode AddrMode; + AddrMode.BaseOffs = 101; + if (!TLI || !TLI->isLegalAddressingMode(*DL, AddrMode, + Type::getInt8Ty(F.getContext()), 0)) + EnableSplitLargeStruct = false; + } + bool MadeChange = true; while (MadeChange) { MadeChange = false; SeenChainsForSExt.clear(); ValToSExtendedUses.clear(); RemovedInsts.clear(); + LargeStructGEPMap.clear(); for (Function::iterator I = F.begin(); I != F.end(); ) { BasicBlock *BB = &*I++; bool ModifiedDTOnIteration = false; @@ -425,6 +446,8 @@ } if (EnableTypePromotionMerge && !ValToSExtendedUses.empty()) MadeChange |= mergeSExts(F); + if (!LargeStructGEPMap.empty()) + MadeChange |= splitLargeStruct(); // Really free removed instructions during promotion. for (Instruction *I : RemovedInsts) @@ -2524,22 +2547,26 @@ /// The ongoing transaction where every action should be registered. TypePromotionTransaction &TPT; + // A GEP which accesses a struct and has too large offset that cannot be + // folded into the addressing mode. + std::pair &LargeStructGEP; + /// This is set to true when we should not do profitability checks. /// When true, IsProfitableToFoldIntoAddressingMode always returns true. bool IgnoreProfitability; AddressingModeMatcher(SmallVectorImpl &AMI, const TargetLowering &TLI, - const TargetRegisterInfo &TRI, - Type *AT, unsigned AS, + const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI, ExtAddrMode &AM, const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, - TypePromotionTransaction &TPT) + TypePromotionTransaction &TPT, + std::pair &LargeStructGEP) : AddrModeInsts(AMI), TLI(TLI), TRI(TRI), DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts), - PromotedInsts(PromotedInsts), TPT(TPT) { + PromotedInsts(PromotedInsts), TPT(TPT), LargeStructGEP(LargeStructGEP) { IgnoreProfitability = false; } @@ -2551,20 +2578,19 @@ /// optimizations. /// \p PromotedInsts maps the instructions to their type before promotion. /// \p The ongoing transaction where every action should be registered. - static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS, - Instruction *MemoryInst, - SmallVectorImpl &AddrModeInsts, - const TargetLowering &TLI, - const TargetRegisterInfo &TRI, - const SetOfInstrs &InsertedInsts, - InstrToOrigTy &PromotedInsts, - TypePromotionTransaction &TPT) { + static ExtAddrMode + Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst, + SmallVectorImpl &AddrModeInsts, + const TargetLowering &TLI, const TargetRegisterInfo &TRI, + const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, + TypePromotionTransaction &TPT, + std::pair &LargeStructGEP) { ExtAddrMode Result; - bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, - AccessTy, AS, + bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS, MemoryInst, Result, InsertedInsts, - PromotedInsts, TPT).matchAddr(V, 0); + PromotedInsts, TPT, LargeStructGEP) + .matchAddr(V, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); return Result; } @@ -3720,6 +3746,31 @@ // Check to see if we can fold the base pointer in too. if (matchAddr(AddrInst->getOperand(0), Depth+1)) return true; + } else { + // Record a GEP which accesses a struct and has too large offset that + // cannot fit into the r+i addressing mode. The GEP is the + // candidate to be split later. + if (EnableSplitLargeStruct && Depth == 0 && ConstantOffset > 0) { + // Simple and common case that only one instruction which is a GEP is + // used in calculating the address for the struct access. + Value *Base = AddrInst->getOperand(0); + PointerType *BaseTy = cast(Base->getType()); + if (BaseTy->getElementType()->isStructTy()) { + Instruction *BaseI = dyn_cast(Base); + if (isa(Base) || isa(Base) || + (BaseI && !isa(BaseI) && + !isa(BaseI) && !isa(BaseI))) + if (GetElementPtrInst *GEP = + dyn_cast(AddrInst)) + if ((BaseI && GEP->getParent() != BaseI->getParent()) || + (!BaseI && + GEP->getParent() != + &GEP->getParent()->getParent()->getEntryBlock())) + // Make sure the base is not in the same basic block as the + // GEP. Otherwise, the split might be undone. + LargeStructGEP = std::make_pair(GEP, ConstantOffset); + } + } } AddrMode.BaseOffs -= ConstantOffset; return false; @@ -4130,12 +4181,12 @@ // will tell us if the addressing mode for the memory operation will // *actually* cover the shared instruction. ExtAddrMode Result; + std::pair LargeStructGEP(nullptr, 0); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); - AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, - AddressAccessTy, AS, - MemoryInst, Result, InsertedInsts, - PromotedInsts, TPT); + AddressingModeMatcher Matcher( + MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result, + InsertedInsts, PromotedInsts, TPT, LargeStructGEP); Matcher.IgnoreProfitability = true; bool Success = Matcher.matchAddr(Address, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); @@ -4237,11 +4288,19 @@ // the result may differ depending on what other uses our candidate // addressing instructions might have. AddrModeInsts.clear(); + std::pair LargeStructGEP(nullptr, 0); ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI, - InsertedInsts, PromotedInsts, TPT); - NewAddrMode.OriginalValue = V; + InsertedInsts, PromotedInsts, TPT, LargeStructGEP); + + if (LargeStructGEP.first && + LargeStructGEP.first->getParent() != MemoryInst->getParent()) + // Only collect GEPs that cannot sink unless the underlying struct is + // split. + LargeStructGEPMap[LargeStructGEP.first->getPointerOperand()].push_back( + LargeStructGEP); + NewAddrMode.OriginalValue = V; if (!AddrModes.addNewAddrMode(NewAddrMode)) break; } @@ -4750,6 +4809,147 @@ return Changed; } +static int +compareGEPOffset(const std::pair *LHS, + const std::pair *RHS) { + if (LHS->first == RHS->first) + return 0; + if (LHS->second != RHS->second) + return LHS->second > RHS->second ? 1 : -1; + return LHS->first > RHS->first ? 1 : -1; +} + +// Spliting large structs so that the GEPs accessing the structs can sink to the +// same block as their users. For example, a large struct starting from %base +// is splitted into two parts where the second part starts from %new_base. +// +// Before: +// BB0: +// %base = +// +// BB1: +// %gep0 = gep %base, off0 +// %gep1 = gep %base, off1 +// %gep2 = gep %base, off2 +// +// BB2: +// %load1 = load %gep0 +// %load2 = load %gep1 +// %load3 = load %gep2 +// +// After: +// BB0: +// %base = +// %new_base = gep %base, off0 +// +// BB1: +// %new_gep0 = %new_base +// %new_gep1 = gep %new_base, off1 - off0 +// %new_gep1 = gep %new_base, off2 - off0 +// +// BB2: +// %load1 = load i32, i32* %new_gep0 +// %load2 = load i32, i32* %new_gep1 +// %load3 = load i32, i32* %new_gep2 +// +// %gep1 and %gep2 can be sunk to BB2 now after the splitting. +bool CodeGenPrepare::splitLargeStruct() { + bool Changed = false; + for (auto &Entry : LargeStructGEPMap) { + SmallVectorImpl> &LargeStructGEPs = + Entry.second; + // Sorting all the GEPs of the same struct based on the offsets. + array_pod_sort(LargeStructGEPs.begin(), LargeStructGEPs.end(), + compareGEPOffset); + LargeStructGEPs.erase( + std::unique(LargeStructGEPs.begin(), LargeStructGEPs.end()), + LargeStructGEPs.end()); + // Skip if all the GEPs have the same offsets. + if (LargeStructGEPs.front().second == LargeStructGEPs.back().second) + continue; + GetElementPtrInst *BaseGEP = LargeStructGEPs.begin()->first; + int64_t BaseOffset = LargeStructGEPs.begin()->second; + Value *NewBaseGEP = nullptr; + for (auto &LargeStructGEP : LargeStructGEPs) { + GetElementPtrInst *GEP = LargeStructGEP.first; + int64_t Offset = LargeStructGEP.second; + if (!GEP) + continue; + if (Offset != BaseOffset) { + TargetLowering::AddrMode AddrMode; + AddrMode.BaseOffs = Offset - BaseOffset; + if (!TLI->isLegalAddressingMode(*DL, AddrMode, + GEP->getResultElementType(), + GEP->getAddressSpace())) { + // We need to create a new base if the offset to the current base is + // too large to fit into the addressing mode. So, a very large struct + // may be splitted into several parts. + BaseGEP = GEP; + BaseOffset = Offset; + NewBaseGEP = nullptr; + } + } + + // Generate a new GEP to replace the current one. + IRBuilder<> Builder(GEP); + Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + Type *I8PtrTy = + Builder.getInt8PtrTy(GEP->getType()->getPointerAddressSpace()); + Type *I8Ty = Builder.getInt8Ty(); + + if (!NewBaseGEP) { + // Create a new base if we don't have one yet. Find the insertion + // pointer for the new base first. + BasicBlock::iterator NewBaseInsertPt; + BasicBlock *NewBaseInsertBB; + if (Instruction *BaseI = dyn_cast(Entry.first)) { + // If the base of the struct is an instruction, the new base will be + // inserted close to it. + NewBaseInsertBB = BaseI->getParent(); + if (isa(BaseI)) + NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); + else if (InvokeInst *Invoke = dyn_cast(BaseI)) { + NewBaseInsertBB = + SplitEdge(NewBaseInsertBB, Invoke->getNormalDest()); + NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); + } else + NewBaseInsertPt = std::next(BaseI->getIterator()); + } else { + // If the current base is an argument or global value, the new base + // will be inserted to the entry block. + NewBaseInsertBB = &BaseGEP->getParent()->getParent()->getEntryBlock(); + NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); + } + IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt); + // Create a new base. + Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset); + NewBaseGEP = Entry.first; + if (NewBaseGEP->getType() != I8PtrTy) + NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy); + NewBaseGEP = + NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep"); + } + + Value *NewGEP = NewBaseGEP; + if (Offset == BaseOffset) { + if (GEP->getType() != I8PtrTy) + NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType()); + } else { + // Calculate the new offset for the new GEP. + Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset); + NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index); + + if (GEP->getType() != I8PtrTy) + NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType()); + } + GEP->replaceAllUsesWith(NewGEP); + GEP->eraseFromParent(); + Changed = true; + } + } + return Changed; +} + /// Return true, if an ext(load) can be formed from an extension in /// \p MovedExts. bool CodeGenPrepare::canFormExtLd( Index: test/Transforms/CodeGenPrepare/AArch64/large-struct.ll =================================================================== --- /dev/null +++ test/Transforms/CodeGenPrepare/AArch64/large-struct.ll @@ -0,0 +1,121 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s + +%struct_type = type { [10000 x i32], i32, i32 } + +define void @test1(%struct_type** %s, i32 %n) { +; CHECK-LABEL: test1 +entry: + %struct = load %struct_type*, %struct_type** %s + br label %while_cond + +while_cond: + %phi = phi i32 [ 0, %entry ], [ %i, %while_body ] +; CHECK: mov w{{[0-9]+}}, #40000 +; CHECK-NOT: mov w{{[0-9]+}}, #40004 + %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1 + %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2 + %cmp = icmp slt i32 %phi, %n + br i1 %cmp, label %while_body, label %while_end + +while_body: +; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #4] + %i = add i32 %phi, 1 + store i32 %i, i32* %gep0 + store i32 %phi, i32* %gep1 + br label %while_cond + +while_end: + ret void +} + +define void @test2(%struct_type* %struct, i32 %n) { +; CHECK-LABEL: test2 +entry: + %cmp = icmp eq %struct_type* %struct, null + br i1 %cmp, label %while_end, label %while_cond + +while_cond: + %phi = phi i32 [ 0, %entry ], [ %i, %while_body ] +; CHECK: mov w{{[0-9]+}}, #40000 +; CHECK-NOT: mov w{{[0-9]+}}, #40004 + %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1 + %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2 + %cmp1 = icmp slt i32 %phi, %n + br i1 %cmp1, label %while_body, label %while_end + +while_body: +; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #4] + %i = add i32 %phi, 1 + store i32 %i, i32* %gep0 + store i32 %phi, i32* %gep1 + br label %while_cond + +while_end: + ret void +} + +define void @test3(%struct_type* %s1, %struct_type* %s2, i1 %cond, i32 %n) { +; CHECK-LABEL: test3 +entry: + br i1 %cond, label %if_true, label %if_end + +if_true: + br label %if_end + +if_end: + %struct = phi %struct_type* [ %s1, %entry ], [ %s2, %if_true ] + %cmp = icmp eq %struct_type* %struct, null + br i1 %cmp, label %while_end, label %while_cond + +while_cond: + %phi = phi i32 [ 0, %if_end ], [ %i, %while_body ] +; CHECK: mov w{{[0-9]+}}, #40000 +; CHECK-NOT: mov w{{[0-9]+}}, #40004 + %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1 + %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2 + %cmp1 = icmp slt i32 %phi, %n + br i1 %cmp1, label %while_body, label %while_end + +while_body: +; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #4] + %i = add i32 %phi, 1 + store i32 %i, i32* %gep0 + store i32 %phi, i32* %gep1 + br label %while_cond + +while_end: + ret void +} + +declare %struct_type* @foo() + +define void @test4(i32 %n) personality i32 (...)* @__FrameHandler { +; CHECK-LABEL: test4 +entry: + %struct = invoke %struct_type* @foo() to label %while_cond unwind label %cleanup + +while_cond: + %phi = phi i32 [ 0, %entry ], [ %i, %while_body ] +; CHECK: mov w{{[0-9]+}}, #40000 +; CHECK-NOT: mov w{{[0-9]+}}, #40004 + %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1 + %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2 + %cmp = icmp slt i32 %phi, %n + br i1 %cmp, label %while_body, label %while_end + +while_body: +; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #4] + %i = add i32 %phi, 1 + store i32 %i, i32* %gep0 + store i32 %phi, i32* %gep1 + br label %while_cond + +while_end: + ret void + +cleanup: + landingpad { i8*, i32 } cleanup + unreachable +} + +declare i32 @__FrameHandler(...)