Index: lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp =================================================================== --- lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -117,19 +117,18 @@ /// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15). class ConstantOffsetExtractor { public: - /// Extracts a constant offset from the given GEP index. It outputs the - /// numeric value of the extracted constant offset (0 if failed), and a + /// Extracts a constant offset from the given GEP index. It returns the /// new index representing the remainder (equal to the original index minus /// the constant offset). /// \p Idx The given GEP index - /// \p NewIdx The new index to replace (output) /// \p DL The datalayout of the module /// \p GEP The given GEP - static int64_t Extract(Value *Idx, Value *&NewIdx, const DataLayout *DL, - GetElementPtrInst *GEP); + static Value *Extract(Value *Idx, const DataLayout *DL, + GetElementPtrInst *GEP); /// Looks for a constant offset without extracting it. The meaning of the /// arguments and the return value are the same as Extract. - static int64_t Find(Value *Idx, const DataLayout *DL, GetElementPtrInst *GEP); + static int64_t Find(Value *Idx, const DataLayout *DL, GetElementPtrInst *GEP, + bool &FoundConst); private: ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt) @@ -148,10 +147,12 @@ /// an index of an inbounds GEP is guaranteed to be /// non-negative. Levaraging this, we can better split /// inbounds GEPs. - APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative); + /// \p FoundConst Whether V constains constants. + APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative, + bool &FoundConst); /// A helper function to look into both operands of a binary operator. - APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended, - bool ZeroExtended); + APInt findInOperands(BinaryOperator *BO, bool SignExtended, bool ZeroExtended, + bool &FoundConst); /// After finding the constant offset C from the GEP index I, we build a new /// index I' s.t. I' + C = I. This function builds and returns the new /// index I' according to UserChain produced by function "find". @@ -182,10 +183,10 @@ /// /// \p ChainIndex The index to UserChain. ChainIndex is initially /// UserChain.size() - 1, and is decremented during - /// the recursion. - Value *distributeExtsAndCloneChain(unsigned ChainIndex); + /// the recursion. The next index to visit is ChainIndex - 1. + Value *distributeExtsAndCloneChain(unsigned &ChainIndex); /// Reassociates the GEP index to the form I' + C and returns I'. - Value *removeConstOffset(unsigned ChainIndex); + Value *removeConstOffset(unsigned &ChainIndex); /// A helper function to apply ExtInsts, a list of s/zext, to value V. /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function /// returns "sext i32 (zext i16 V to i32) to i64". @@ -207,12 +208,24 @@ bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO, bool NonNegative); - /// The path from the constant offset to the old GEP index. e.g., if the GEP - /// index is "a * b + (c + 5)". After running function find, UserChain[0] will - /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and - /// UserChain[2] will be the entire expression "a * b + (c + 5)". - /// - /// This path helps to rebuild the new GEP index. + /// UserChain records the visit path from constant offsets to the old GEP + /// index. e.g. if the GEP index is calculated as following: + /// b = a + 3 + /// c = b + 4; + /// e = d + 8; + /// f = c + e; + /// And such index is actually a tree: + /// f = c + e + /// / \ + /// c = b + 4 e = d + 8 + /// / \ \ + /// b = a + 3 4 8 + /// / + /// 3 + /// The traversal is DFS(Depth-First Search) and stored in post-order, so such + /// nodes are kept in UserChain as following: + /// 3, b = a + 3, 4, c = b + 4, 8, e = d + 8, f = c + e; + /// This sequence is important when we try to remove constants. SmallVector UserChain; /// A data structure used in rebuildWithoutConstOffset. Contains all /// sext/zext instructions along UserChain. @@ -355,30 +368,23 @@ return true; } -APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO, - bool SignExtended, - bool ZeroExtended) { +APInt ConstantOffsetExtractor::findInOperands(BinaryOperator *BO, + bool SignExtended, + bool ZeroExtended, + bool &FoundConst) { // BO being non-negative does not shed light on whether its operands are // non-negative. Clear the NonNegative flag here. - APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended, - /* NonNegative */ false); - // If we found a constant offset in the left operand, stop and return that. - // This shortcut might cause us to miss opportunities of combining the - // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9. - // However, such cases are probably already handled by -instcombine, - // given this pass runs after the standard optimizations. - if (ConstantOffset != 0) return ConstantOffset; - ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended, - /* NonNegative */ false); - // If U is a sub operator, negate the constant offset found in the right - // operand. - if (BO->getOpcode() == Instruction::Sub) - ConstantOffset = -ConstantOffset; - return ConstantOffset; + APInt Constant0 = find(BO->getOperand(0), SignExtended, ZeroExtended, + /* NonNegative */ false, FoundConst); + APInt Constant1 = find(BO->getOperand(1), SignExtended, ZeroExtended, + /* NonNegative */ false, FoundConst); + bool IsSub = (BO->getOpcode() == Instruction::Sub); + return IsSub ? Constant0 - Constant1 : Constant0 + Constant1; } APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended, - bool ZeroExtended, bool NonNegative) { + bool ZeroExtended, bool NonNegative, + bool &FoundConst) { // TODO(jingyue): We could trace into integer/pointer casts, such as // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only // integers because it gives good enough results for our benchmarks. @@ -389,32 +395,38 @@ if (U == nullptr) return APInt(BitWidth, 0); APInt ConstantOffset(BitWidth, 0); + bool FindInOperands = false; if (ConstantInt *CI = dyn_cast(V)) { // Hooray, we found it! ConstantOffset = CI->getValue(); + // Zero is a valid constant offset, but doesn't help this optimization. + FindInOperands = (ConstantOffset != 0); } else if (BinaryOperator *BO = dyn_cast(V)) { // Trace into subexpressions for more hoisting opportunities. if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative)) { - ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended); + ConstantOffset = + findInOperands(BO, SignExtended, ZeroExtended, FindInOperands); } } else if (isa(V)) { ConstantOffset = find(U->getOperand(0), /* SignExtended */ true, - ZeroExtended, NonNegative).sext(BitWidth); + ZeroExtended, NonNegative, + FindInOperands).sext(BitWidth); } else if (isa(V)) { // As an optimization, we can clear the SignExtended flag because // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll. // // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0. - ConstantOffset = - find(U->getOperand(0), /* SignExtended */ false, - /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth); + ConstantOffset = find(U->getOperand(0), /* SignExtended */ false, + /* ZeroExtended */ true, /* NonNegative */ false, + FindInOperands).zext(BitWidth); } - // If we found a non-zero constant offset, add it to the path for - // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't - // help this optimization. - if (ConstantOffset != 0) + // If we found constant offset in current Value, add it to the path for + // rebuildWithoutConstOffset. + if (FindInOperands) { UserChain.push_back(U); + FoundConst = true; + } return ConstantOffset; } @@ -438,7 +450,11 @@ } Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() { - distributeExtsAndCloneChain(UserChain.size() - 1); + assert(UserChain.size() > 0 && "Empty Chain"); + + unsigned ChainIndex = UserChain.size() - 1; + distributeExtsAndCloneChain(ChainIndex); + assert(ChainIndex == 0 && "Make sure that all Users have been visited"); // Remove all nullptrs (used to be s/zext) from UserChain. unsigned NewSize = 0; for (auto I = UserChain.begin(), E = UserChain.end(); I != E; ++I) { @@ -448,63 +464,70 @@ } } UserChain.resize(NewSize); - return removeConstOffset(UserChain.size() - 1); + ChainIndex = NewSize - 1; + // Remove all the constant offsets and generate a new index. + Value *NewIdx = removeConstOffset(ChainIndex); + assert(ChainIndex == 0 && "Make sure that all Users have been visited"); + return NewIdx; } Value * -ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) { +ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned &ChainIndex) { User *U = UserChain[ChainIndex]; - if (ChainIndex == 0) { - assert(isa(U)); - // If U is a ConstantInt, applyExts will return a ConstantInt as well. + // If U is a ConstantInt, applyExts will return a ConstantInt as well. + if (isa(U)) return UserChain[ChainIndex] = cast(applyExts(U)); - } + assert(ChainIndex != 0 && + "Only Constant is allowed the last one in the Chain"); + // Store current index in case that ChainIndex may be decremented. + unsigned ThisIndex = ChainIndex; if (CastInst *Cast = dyn_cast(U)) { assert((isa(Cast) || isa(Cast)) && "We only traced into two types of CastInst: sext and zext"); ExtInsts.push_back(Cast); UserChain[ChainIndex] = nullptr; - return distributeExtsAndCloneChain(ChainIndex - 1); + Value *NewV = distributeExtsAndCloneChain(--ChainIndex); + // Pop back as this CastInst is only used when visiting it's operand. + ExtInsts.pop_back(); + return NewV; } // Function find only trace into BinaryOperator and CastInst. BinaryOperator *BO = cast(U); - // OpNo = which operand of BO is UserChain[ChainIndex - 1] - unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1); - Value *TheOther = applyExts(BO->getOperand(1 - OpNo)); - Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1); - - BinaryOperator *NewBO = nullptr; - if (OpNo == 0) { - NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther, - BO->getName(), IP); + + Value *NewOp0, *NewOp1; + // At least one of the two operands are the next in Chain. Firstly check if + // the next to be visited is operand 1. + // If true, the next to be visited may be operand 0 (depend on whether + // operand 0 is equal to the next in chain). + // If false, the next to be visited must be operand 0. + if (BO->getOperand(1) == UserChain[ChainIndex - 1]) { + NewOp1 = distributeExtsAndCloneChain(--ChainIndex); + + if (ChainIndex != 0 && BO->getOperand(0) == UserChain[ChainIndex - 1]) + NewOp0 = distributeExtsAndCloneChain(--ChainIndex); + else + NewOp0 = applyExts(BO->getOperand(0)); } else { - NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain, - BO->getName(), IP); + assert(BO->getOperand(0) == UserChain[ChainIndex - 1] && + "At least one operand is next in Chain"); + NewOp1 = applyExts(BO->getOperand(1)); + NewOp0 = distributeExtsAndCloneChain(--ChainIndex); } - return UserChain[ChainIndex] = NewBO; + + BinaryOperator *NewBO = BinaryOperator::Create(BO->getOpcode(), NewOp0, + NewOp1, BO->getName(), IP); + return UserChain[ThisIndex] = NewBO; } -Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) { - if (ChainIndex == 0) { - assert(isa(UserChain[ChainIndex])); +Value *ConstantOffsetExtractor::removeConstOffset(unsigned &ChainIndex) { + if (isa(UserChain[ChainIndex])) return ConstantInt::getNullValue(UserChain[ChainIndex]->getType()); - } + assert(ChainIndex != 0 && + "Only Constant is allowed to be the last one in the Chain"); BinaryOperator *BO = cast(UserChain[ChainIndex]); - unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1); - assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]); - Value *NextInChain = removeConstOffset(ChainIndex - 1); - Value *TheOther = BO->getOperand(1 - OpNo); - - // If NextInChain is 0 and not the LHS of a sub, we can simplify the - // sub-expression to be just TheOther. - if (ConstantInt *CI = dyn_cast(NextInChain)) { - if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0)) - return TheOther; - } - if (BO->getOpcode() == Instruction::Or) { // Rebuild "or" as "add", because "or" may be invalid for the new // epxression. @@ -519,50 +542,75 @@ // // Replacing the "or" with "add" is fine, because // a | (b + 5) = a + (b + 5) = (a + b) + 5 - if (OpNo == 0) { - return BinaryOperator::CreateAdd(NextInChain, TheOther, BO->getName(), - IP); - } else { - return BinaryOperator::CreateAdd(TheOther, NextInChain, BO->getName(), - IP); + BO = BinaryOperator::CreateAdd(BO->getOperand(0), BO->getOperand(1), + BO->getName(), BO); + } + + // Similar to distributeExtsAndCloneChain. Firstly check if the next to be + // visited is operand 1. + // If true, the next to be visited may be operand 0 (depend on whether + // operand 0 is equal to the next in chain). + // If false, the next to be visited must be operand 0. + Value *NewOp0 = BO->getOperand(0), *NewOp1 = BO->getOperand(1); + if (BO->getOperand(1) == UserChain[ChainIndex - 1]) { + NewOp1 = removeConstOffset(--ChainIndex); + BO->setOperand(1, NewOp1); + + if (ChainIndex != 0 && BO->getOperand(0) == UserChain[ChainIndex - 1]) { + NewOp0 = removeConstOffset(--ChainIndex); + BO->setOperand(0, NewOp0); + } + } else { + assert(BO->getOperand(0) == UserChain[ChainIndex - 1] && + "At least one operand is next in Chain"); + NewOp0 = removeConstOffset(--ChainIndex); + BO->setOperand(0, NewOp0); + } + + // The new operand can be removed if it is constant. + bool RemoveOp0 = false, RemoveOp1 = false; + if (ConstantInt *CI = dyn_cast(NewOp1)) { + assert(CI->isZero() && "Make sure that all constants have been removed"); + RemoveOp1 = true; + } + if (ConstantInt *CI = dyn_cast(NewOp0)) { + assert(CI->isZero() && "Make sure that all constants have been removed"); + // Can not remove constant 0 from (0 - b) + if (BO->getOpcode() != Instruction::Sub || RemoveOp1) { + RemoveOp0 = true; } } - // We can reuse BO in this case, because the new expression shares the same - // instruction type and BO is used at most once. - assert(BO->getNumUses() <= 1 && - "distributeExtsAndCloneChain clones each BinaryOperator in " - "UserChain, so no one should be used more than " - "once"); - BO->setOperand(OpNo, NextInChain); + if (RemoveOp0 && RemoveOp1) + return ConstantInt::getNullValue(UserChain[ChainIndex]->getType()); + if (RemoveOp0 || RemoveOp1) + return RemoveOp0 ? NewOp1 : NewOp0; + BO->setHasNoSignedWrap(false); BO->setHasNoUnsignedWrap(false); - // Make sure it appears after all instructions we've inserted so far. - BO->moveBefore(IP); return BO; } -int64_t ConstantOffsetExtractor::Extract(Value *Idx, Value *&NewIdx, - const DataLayout *DL, - GetElementPtrInst *GEP) { +Value *ConstantOffsetExtractor::Extract(Value *Idx, const DataLayout *DL, + GetElementPtrInst *GEP) { ConstantOffsetExtractor Extractor(DL, GEP); - // Find a non-zero constant offset first. - APInt ConstantOffset = - Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, - GEP->isInBounds()); - if (ConstantOffset != 0) { + // Try to find constant offsets. + bool FoundConst = false; + Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, + GEP->isInBounds(), FoundConst); + if (FoundConst) // Separates the constant offset from the GEP index. - NewIdx = Extractor.rebuildWithoutConstOffset(); - } - return ConstantOffset.getSExtValue(); + return Extractor.rebuildWithoutConstOffset(); + return nullptr; } int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL, - GetElementPtrInst *GEP) { + GetElementPtrInst *GEP, + bool &FoundConst) { // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative. return ConstantOffsetExtractor(DL, GEP) .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, - GEP->isInBounds()) + GEP->isInBounds(), FoundConst) .getSExtValue(); } @@ -610,9 +658,13 @@ for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { if (isa(*GTI)) { // Tries to extract a constant offset from this GEP index. - int64_t ConstantOffset = - ConstantOffsetExtractor::Find(GEP->getOperand(I), DL, GEP); - if (ConstantOffset != 0) { + bool FoundConst = false; + int64_t ConstantOffset = ConstantOffsetExtractor::Find( + GEP->getOperand(I), DL, GEP, FoundConst); + // Use FoundConst to check whether we need extraction. We don't check + // whether ConstantOffset is zero, as it can not cover some situations + // like (a - 4) + 4. + if (FoundConst) { NeedsExtraction = true; // A GEP may have multiple indices. We accumulate the extracted // constant offset to a byte offset, and later offset the remainder of @@ -657,15 +709,12 @@ gep_type_iterator GTI = gep_type_begin(*GEP); for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { if (isa(*GTI)) { - Value *NewIdx = nullptr; - // Tries to extract a constant offset from this GEP index. - int64_t ConstantOffset = - ConstantOffsetExtractor::Extract(GEP->getOperand(I), NewIdx, DL, GEP); - if (ConstantOffset != 0) { - assert(NewIdx != nullptr && - "ConstantOffset != 0 implies NewIdx is set"); + // Splits this GEP index into a variadic part and a constant offset, and + // uses the variadic part as the new index. + Value *NewIdx = + ConstantOffsetExtractor::Extract(GEP->getOperand(I), DL, GEP); + if (NewIdx != nullptr) GEP->setOperand(I, NewIdx); - } } } // Clear the inbounds attribute because the new index may be off-bound. @@ -688,6 +737,9 @@ // TODO(jingyue): do some range analysis to keep as many inbounds as // possible. GEPs with inbounds are more friendly to alias analysis. GEP->setIsInBounds(false); + // No need to create another GEP as the accumulative byte offset is 0. + if (AccumulativeByteOffset == 0) + return true; // Offsets the base with the accumulative byte offset. // Index: test/Transforms/SeparateConstOffsetFromGEP/AArch64/lit.local.cfg =================================================================== --- /dev/null +++ test/Transforms/SeparateConstOffsetFromGEP/AArch64/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'AArch64' in config.root.targets: + config.unsupported = True + Index: test/Transforms/SeparateConstOffsetFromGEP/AArch64/split-gep.ll =================================================================== --- /dev/null +++ test/Transforms/SeparateConstOffsetFromGEP/AArch64/split-gep.ll @@ -0,0 +1,51 @@ +; RUN: opt < %s -separate-const-offset-from-gep -dce -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "arm64-linux-gnu" + +; Check that (a - 4) + 4 can be optimized correctly. +define i16* @find_0(i32 %a, i16* %ptr) { + %sub = add nsw i32 %a, -4 + %ext = sext i32 %sub to i64 + %sum = add nsw i64 %ext, 4 + %incdec.ptr = getelementptr i16* %ptr, i64 %sum + ret i16* %incdec.ptr +} +; CHECK-LABEL: @find_0( +; CHECK-NOT: add +; CHECK: [[EXT:%[a-zA-Z0-9]+]] = sext +; CHECK: getelementptr i16* %ptr, i64 [[EXT]] +; CHECK-NEXT: ret + +; Check that ((a - 4) + 4) + 1 can be optimized correctly. +define i16* @find_1(i32 %a, i16* %ptr) { + %sub = add nsw i32 %a, -4 + %ext = sext i32 %sub to i64 + %sum = add nsw i64 %ext, 4 + %add.sum = add i64 %sum, 1 + %incdec.ptr = getelementptr i16* %ptr, i64 %add.sum + ret i16* %incdec.ptr +} +; CHECK-LABEL: @find_1( +; CHECK-NOT: add +; CHECK: [[EXT:%[a-zA-Z0-9]+]] = sext +; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr i16* %ptr, i64 [[EXT]] +; CHECK: getelementptr i16* [[PTR]], i64 1 + +; Check a more complex case: ((a + 4) - 8) + (b + 16) +define i16* @find_other(i32 %a, i32 %b, i16* %ptr) { + %add = add nsw i32 %a, 4 + %addext = sext i32 %add to i64 + %subadd = add nsw i64 %addext, -8 + %add2 = add nsw i32 %b, 16 + %add2ext = sext i32 %add2 to i64 + %addsum = add nsw i64 %subadd, %add2ext + %incdec.ptr = getelementptr i16* %ptr, i64 %addsum + ret i16* %incdec.ptr +} +; CHECK-LABEL: @find_other( +; CHECK: sext i32 +; CHECK: sext i32 +; CHECK: [[SUM:%[a-zA-Z0-9]+]] = add +; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr i16* %ptr, i64 [[SUM]] +; CHECK: getelementptr i16* [[PTR]], i64 12 Index: test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll =================================================================== --- test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll +++ test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll @@ -242,14 +242,10 @@ entry: %shl = shl i64 %a, 2 %add = add i64 %shl, 12 - %or = or i64 %add, 1 -; CHECK: [[OR:%or[0-9]*]] = add i64 %shl, 1 - ; ((a << 2) + 12) and 1 have no common bits. Therefore, - ; SeparateConstOffsetFromGEP is able to extract the 12. - ; TODO(jingyue): We could reassociate the expression to combine 12 and 1. + %or = or i64 %add, 1 ; ((a << 2) + 12) and 1 have no common bits. %p = getelementptr float* %ptr, i64 %or -; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr float* %ptr, i64 [[OR]] -; CHECK: getelementptr float* [[PTR]], i64 12 +; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr float* %ptr, i64 %shl +; CHECK: getelementptr float* [[PTR]], i64 13 ret float* %p ; CHECK-NEXT: ret }