Index: lib/Transforms/Scalar/GVN.cpp =================================================================== --- lib/Transforms/Scalar/GVN.cpp +++ lib/Transforms/Scalar/GVN.cpp @@ -696,6 +696,8 @@ // Helper fuctions of redundant load elimination bool processLoad(LoadInst *L); + bool loadGEPSelect(LoadInst *LI, MemDepResult Dep); + bool processNonLocalLoad(LoadInst *L); void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, AvailValInBlkVect &ValuesPerBlock, @@ -1817,6 +1819,127 @@ I->replaceAllUsesWith(Repl); } +// Helper function for loadGEPSelect(). Look for the following pattern: +// +// (load (gep ptr idx0 idx1 idx2 )) +// +// Only match above patterns which satisfy the following additional +// constraints: +// - The inbounds attribute must match the inbounds attribute +// of the base GEP +// - The number of indicies must match. +// - The operands, including the pointer operand, only differ by one operand, +// which is the operand fed by a select. +// +static bool isNearMatchingLoadGEP(Value *Inst, GetElementPtrInst *BaseGEP, + unsigned SIIdx) { + if (LoadInst *LI = dyn_cast(Inst)) { + Value *Ptr = LI->getPointerOperand(); + + if (GetElementPtrInst *GEPI = dyn_cast(Ptr)) { + if (GEPI->isInBounds() != BaseGEP->isInBounds() || + GEPI->getNumIndices() != BaseGEP->getNumIndices()) + return false; + + // Compute the number of matching indices. + unsigned NumMatch = 0; + unsigned NumOps = GEPI->getNumOperands(); + for (unsigned i = 0; i != NumOps; ++i, ++NumMatch) + if (GEPI->getOperand(i) != BaseGEP->getOperand(i)) + if (i != SIIdx) + break; + + return NumMatch == NumOps; + } + } + return false; +} + +// Do the following transformation: +// +// a = (load (gep ptr_op idx idx_a)) +// b = (load (gep ptr_op idx idx_b)) +// c = (load (gep ptr_op idx (select (cond, idx_a, idx_b)))) +// --> +// a = (load (gep ptr_op idx idx_a)) +// b = (load (gep ptr_op idx idx_b)) +// c = (select (cond, a, b)) +// +bool GVN::loadGEPSelect(LoadInst *LI, MemDepResult Dep) { + Value *Op = LI->getOperand(0); + + GetElementPtrInst *GEPI = dyn_cast(Op); + if (!GEPI) + return false; + + // Check whether the GEP is using the result of a select. + unsigned SIIdx = 0; + SelectInst *SI = nullptr; + for (unsigned i = 0, e = GEPI->getNumOperands(); i != e; ++i) { + Value *Op0 = GEPI->getOperand(i); + if (ZExtInst *Ext = dyn_cast(Op0)) + Op0 = Ext->getOperand(0); + if (isa(Op0)) { + SIIdx = i; + SI = cast(Op0); + break; + } + } + if (!SI) + return false; + + Value *SelectTrueOp = SI->getTrueValue(); + Value *SelectFalseOp = SI->getFalseValue(); + + // Scan from the load until we encounter a dependent instruction or the + // beginning of the block. + Instruction *DepInst = Dep.getInst(); + BasicBlock::iterator ScanFrom = LI, ScanEnd; + if (DepInst) + ScanEnd = DepInst; + else + ScanEnd = LI->getParent()->begin(); + + Value *TrueLoad = nullptr; + Value *FalseLoad = nullptr; + + // Scan the basic block backwards looking for loads with nearly matching geps. + while (ScanFrom != ScanEnd) { + Instruction *Inst = --ScanFrom; + + if (!isNearMatchingLoadGEP(Inst, GEPI, SIIdx)) + continue; + + Value *Ptr = cast(Inst)->getPointerOperand(); + GetElementPtrInst *GEPI2 = cast(Ptr); + Value *GEPOp = GEPI2->getOperand(SIIdx); + + // Look through zext. + if (ZExtInst *Ext = dyn_cast(GEPOp)) + GEPOp = Ext->getOperand(0); + + // Does the gep op match either of the select ops? + if (GEPOp == SelectTrueOp) + TrueLoad = Inst; + else if (GEPOp == SelectFalseOp) + FalseLoad = Inst; + + // All preconditions fulfilled, apply the transformation: + if (TrueLoad && FalseLoad) { + Value *V = SelectInst::Create(SI->getCondition(), TrueLoad, FalseLoad, + "ld-gep-sel", LI); + LI->replaceAllUsesWith(V); + + if (V->getType()->getScalarType()->isPointerTy()) + MD->invalidateCachedPointerInfo(V); + markInstructionForDeletion(LI); + ++NumGVNLoad; + return true; + } + } + return false; +} + /// Attempt to eliminate a load, first by eliminating it /// locally, and then attempting non-local elimination if that fails. bool GVN::processLoad(LoadInst *L) { @@ -1910,6 +2033,13 @@ return false; } + // Attempt to eliminate redundant loads whose addresses are dependent on + // the result of a select instruction. If both values have already been + // loaded into registers the load becomes redundant and can be replaced + // with a select on the two values instead. + if (loadGEPSelect(L, Dep)) + return true; + // If it is defined in another block, try harder. if (Dep.isNonLocal()) return processNonLocalLoad(L); Index: test/Transforms/GVN/load-gep-select.ll =================================================================== --- /dev/null +++ test/Transforms/GVN/load-gep-select.ll @@ -0,0 +1,93 @@ +; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +%struct.s = type { i32 } + +@vec = common global i32* null, align 8 +@agg = common global %struct.s** null, align 8 + +; unsigned *vec; +; +; unsigned test1(unsigned i, unsigned max) { +; if (vec[i] > vec[i+1]) +; ++i; +; if (vec[i] > max) <- Remove redundant loads +; return 1; +; return 0; +; } + +; CHECK-LABEL: test1 +; CHECK: load i32, i32* %arrayidx, align 4 +; CHECK: load i32, i32* %arrayidx2, align 4 +; CHECK-NOT: load +; CHECK: ret i32 + +define i32 @test1(i32 %i, i32 %max) { +entry: + %idxprom = zext i32 %i to i64 + %0 = load i32*, i32** @vec, align 8 + %arrayidx = getelementptr inbounds i32, i32* %0, i64 %idxprom + %1 = load i32, i32* %arrayidx, align 4 + %add = add i32 %i, 1 + %idxprom1 = zext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %0, i64 %idxprom1 + %2 = load i32, i32* %arrayidx2, align 4 + %cmp = icmp ugt i32 %1, %2 + %add.i = select i1 %cmp, i32 %add, i32 %i + %idxprom3 = zext i32 %add.i to i64 + %arrayidx4 = getelementptr inbounds i32, i32* %0, i64 %idxprom3 + %3 = load i32, i32* %arrayidx4, align 4 + %cmp5 = icmp ugt i32 %3, %max + %retval.0 = zext i1 %cmp5 to i32 + ret i32 %retval.0 +} + +; struct s { +; unsigned field; +; }; +; struct s **agg; +; +; unsigned test2(unsigned i, unsigned max) { +; if (agg[i]->field > agg[i+1]->field) +; ++i; +; if (agg[i]->field > max) <- Remove redundant loads +; return 1; +; return 0; +; } + +; CHECK-LABEL: test2 +; CHECK: load %struct.s*, %struct.s** %arrayidx, align 8 +; CHECK: load i32, i32* %field, align 4 +; CHECK: load %struct.s*, %struct.s** %arrayidx2, align 8 +; CHECK: load i32, i32* %field3, align 4 +; CHECK-NOT: load +; CHECK-NOT: load +; CHECK: ret i32 + +define i32 @test2(i32 %i, i32 %max) { +entry: + %idxprom = zext i32 %i to i64 + %0 = load %struct.s**, %struct.s*** @agg, align 8 + %arrayidx = getelementptr inbounds %struct.s*, %struct.s** %0, i64 %idxprom + %1 = load %struct.s*, %struct.s** %arrayidx, align 8 + %field = getelementptr inbounds %struct.s, %struct.s* %1, i64 0, i32 0 + %2 = load i32, i32* %field, align 4 + %add = add i32 %i, 1 + %idxprom1 = zext i32 %add to i64 + %arrayidx2 = getelementptr inbounds %struct.s*, %struct.s** %0, i64 %idxprom1 + %3 = load %struct.s*, %struct.s** %arrayidx2, align 8 + %field3 = getelementptr inbounds %struct.s, %struct.s* %3, i64 0, i32 0 + %4 = load i32, i32* %field3, align 4 + %cmp = icmp ugt i32 %2, %4 + %add.i = select i1 %cmp, i32 %add, i32 %i + %idxprom4 = zext i32 %add.i to i64 + %arrayidx5 = getelementptr inbounds %struct.s*, %struct.s** %0, i64 %idxprom4 + %5 = load %struct.s*, %struct.s** %arrayidx5, align 8 + %field6 = getelementptr inbounds %struct.s, %struct.s* %5, i64 0, i32 0 + %6 = load i32, i32* %field6, align 4 + %cmp7 = icmp ugt i32 %6, %max + %retval.0 = zext i1 %cmp7 to i32 + ret i32 %retval.0 +}