Index: lib/Transforms/Scalar/GVN.cpp =================================================================== --- lib/Transforms/Scalar/GVN.cpp +++ lib/Transforms/Scalar/GVN.cpp @@ -694,6 +694,8 @@ // Helper fuctions of redundant load elimination bool processLoad(LoadInst *L); + bool loadGEPSelect(LoadInst *LI, MemDepResult Dep); + bool processNonLocalLoad(LoadInst *L); void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, AvailValInBlkVect &ValuesPerBlock, @@ -1814,6 +1816,129 @@ I->replaceAllUsesWith(Repl); } +// Helper function for loadGEPSelect(). +// +// Determine if two GEPs match based on the following constraints: +// - The inbounds attributes must match. +// - The number of indices must match. +// - The SIIdx operand matches the SelectOp. +// - All other operands, including the pointer operand, match. +// +static bool isMatchingGEP(GetElementPtrInst *BaseGEP, + GetElementPtrInst *OtherGEP, unsigned SIIdx, + Value *SelectOp) { + GetElementPtrInst *GEPI = dyn_cast(OtherGEP); + if (!GEPI) + return false; + + if (GEPI->isInBounds() != BaseGEP->isInBounds() || + GEPI->getNumIndices() != BaseGEP->getNumIndices()) + return false; + + for (unsigned i = 0, e = GEPI->getNumOperands(); i != e; ++i) { + if (i == SIIdx) { + Value *Op = GEPI->getOperand(i); + if (CastInst *Cast = dyn_cast(Op)) + Op = Cast->getOperand(0); + if (Op != SelectOp) + return false; + } else if (GEPI->getOperand(i) != BaseGEP->getOperand(i)) + return false; + } + return true; +} + +// Inspect the select operand's users to find a matching load/store, which would +// cause LI to be redundant. +static Value *matchingLdSt(LoadInst *LI, MemDepResult Dep, unsigned Idx, + Value *SelectOp, DominatorTree *DT) { + for (Value::user_iterator UI = SelectOp->user_begin(), + UE = SelectOp->user_end(); + UI != UE;) { + User *U = *UI++; + + // Look through casts. + if (CastInst *Cast = dyn_cast(U)) + U = Cast->user_back(); + + GetElementPtrInst *SelectGEP = dyn_cast(U); + if (!SelectGEP) + continue; + + // Assume a single use of the GEP. + if (!SelectGEP->hasOneUse()) + continue; + + // GEP user must be a load or a store. + Instruction *User = SelectGEP->user_back(); + if (isa(User) && isa(User)) + continue; + + // Load and GEP must be in the same block, since we only use local + // dependency analysis. + if (User->getParent() != LI->getParent()) + continue; + + if (Instruction *DepInst = Dep.getInst()) + if (DepInst != User && !DT->dominates(DepInst, User)) + continue; + + GetElementPtrInst *GEP = dyn_cast(LI->getOperand(0)); + if (isMatchingGEP(GEP, SelectGEP, Idx, SelectOp)) + return isa(User) ? User : cast(User)->getOperand(0); + } + return nullptr; +} + +// Do the following transformation: +// +// a = (ld|st (gep ptr_op idx idx_a)) +// b = (ld|st (gep ptr_op idx idx_b)) +// c = (load (gep ptr_op idx (select (cond, idx_a, idx_b)))) +// --> +// a = (ld|st (gep ptr_op idx idx_a)) +// b = (ld|st (gep ptr_op idx idx_b)) +// c = (select (cond, a, b)) +// +bool GVN::loadGEPSelect(LoadInst *LI, MemDepResult Dep) { + Value *Op = LI->getOperand(0); + + GetElementPtrInst *GEPI = dyn_cast(Op); + if (!GEPI) + return false; + + // Check whether the GEP is using the result of a select. + for (unsigned Idx = 0, E = GEPI->getNumOperands(); Idx != E; ++Idx) { + Value *Op = GEPI->getOperand(Idx); + + // Look through casts. + if (CastInst *Cast = dyn_cast(Op)) + Op = Cast->getOperand(0); + + // Must be a select instruction. + if (!isa(Op)) + continue; + + SelectInst *SI = cast(Op); + Value *TrueOp = matchingLdSt(LI, Dep, Idx, SI->getTrueValue(), DT); + Value *FalseOp = matchingLdSt(LI, Dep, Idx, SI->getFalseValue(), DT); + + // All preconditions fulfilled, apply the transformation: + if (TrueOp && FalseOp) { + Value *V = SelectInst::Create(SI->getCondition(), TrueOp, FalseOp, + "sel-pre", LI); + LI->replaceAllUsesWith(V); + + if (V->getType()->getScalarType()->isPointerTy()) + MD->invalidateCachedPointerInfo(V); + markInstructionForDeletion(LI); + ++NumGVNLoad; + return true; + } + } + return false; +} + /// Attempt to eliminate a load, first by eliminating it /// locally, and then attempting non-local elimination if that fails. bool GVN::processLoad(LoadInst *L) { @@ -1893,6 +2018,13 @@ } } + // Attempt to eliminate redundant loads whose addresses are dependent on + // the result of a select instruction. If both values have already been + // loaded into registers the load becomes redundant and can be replaced + // with a select on the two values instead. + if (loadGEPSelect(L, Dep)) + return true; + // If the value isn't available, don't do anything! if (Dep.isClobber()) { DEBUG( Index: test/Transforms/GVN/load-gep-select.ll =================================================================== --- /dev/null +++ test/Transforms/GVN/load-gep-select.ll @@ -0,0 +1,147 @@ +; RUN: opt < %s -basicaa -gvn -enable-load-pre -dce -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +%struct.s = type { i32 } + +@agg = common global %struct.s** null, align 8 + +; struct s { +; unsigned field; +; }; +; struct s **agg; + +; int test1(int* p, int i, int max){ +; if (p[i] > p[max]) +; max = i; +; return p[max]; +; } + +; CHECK-LABEL: @test1( +; CHECK: [[LD1:%[0-9]+]] = load i32, i32* +; CHECK: [[LD2:%[0-9]+]] = load i32, i32* +; CHECK-NOT: load i32, i32* +; CHECK: %sel-pre = select i1 %cmp, i32 [[LD1]], i32 [[LD2]] +; CHECK: ret i32 + +define i32 @test1(i32* nocapture readonly %p, i32 %i, i32 %max) { +entry: + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %idxprom1 = sext i32 %max to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %p, i64 %idxprom1 + %1 = load i32, i32* %arrayidx2, align 4 + %cmp = icmp sgt i32 %0, %1 + %i.max = select i1 %cmp, i32 %i, i32 %max + %idxprom3 = sext i32 %i.max to i64 + %arrayidx4 = getelementptr inbounds i32, i32* %p, i64 %idxprom3 + %2 = load i32, i32* %arrayidx4, align 4 + ret i32 %2 +} + + +; int test2(int* p, int i, int max){ +; if (p[i] > p[i+1]) +; ++i; +; return p[i]; +; } + +; CHECK-LABEL: @test2( +; CHECK: [[LD1:%[0-9]+]] = load i32, i32* +; CHECK: [[LD2:%[0-9]+]] = load i32, i32* +; CHECK-NOT: load i32, i32* +; CHECK: %sel-pre = select i1 %cmp, i32 [[LD2]], i32 [[LD1]] +; CHECK: ret i32 + +define i32 @test2(i32* nocapture readonly %p, i32 %i, i32 %max) { +entry: + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %i, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %p, i64 %idxprom1 + %1 = load i32, i32* %arrayidx2, align 4 + %cmp = icmp sgt i32 %0, %1 + %add.i = select i1 %cmp, i32 %add, i32 %i + %idxprom3 = sext i32 %add.i to i64 + %arrayidx4 = getelementptr inbounds i32, i32* %p, i64 %idxprom3 + %2 = load i32, i32* %arrayidx4, align 4 + ret i32 %2 +} + +; int test3(int* p, int i, int max, int f){ +; p[i] = f; +; if (f > p[i+1]) +; ++i; +; return p[i]; +;} + +; CHECK-LABEL: @test3( +; CHECK: store i32 %f +; CHECK: [[LD:%[0-9]+]] = load i32, i32* %arrayidx2, align 4 +; CHECK-NOT: load i32, i32* +; CHECK: %sel-pre = select i1 %cmp, i32 [[LD]], i32 %f +; CHECK: ret i32 + +define i32 @test3(i32* nocapture %p, i32 %i, i32 %max, i32 %f) { +entry: + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom + store i32 %f, i32* %arrayidx, align 4 + %add = add nsw i32 %i, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %p, i64 %idxprom1 + %0 = load i32, i32* %arrayidx2, align 4 + %cmp = icmp slt i32 %0, %f + %add.i = select i1 %cmp, i32 %add, i32 %i + %idxprom3 = sext i32 %add.i to i64 + %arrayidx4 = getelementptr inbounds i32, i32* %p, i64 %idxprom3 + %1 = load i32, i32* %arrayidx4, align 4 + ret i32 %1 +} + +; unsigned test4(unsigned i, unsigned max) { +; if (agg[i]->field > agg[i+1]->field) +; ++i; +; return agg[i]->field > max; +; } + +; CHECK-LABEL: @test4( +; CHECK: load %struct.s** +; CHECK: load %struct.s* +; CHECK: [[LD1:%[0-9]+]] = load i32, i32* +; CHECK: load %struct.s* +; CHECK: [[LD2:%[0-9]+]] = load i32, i32* +; CHECK-NOT: load %struct.s* +; CHECK-NOT: load i32, i32* +; CHECK: %sel-pre1 = select i1 %cmp, i32 [[LD2]], i32 [[LD1]] +; CHECK: ret i32 + +define i32 @test4(i32 %i, i32 %max) { +entry: + %idxprom = zext i32 %i to i64 + %0 = load %struct.s**, %struct.s*** @agg, align 8 + %arrayidx = getelementptr inbounds %struct.s*, %struct.s** %0, i64 %idxprom + %1 = load %struct.s*, %struct.s** %arrayidx, align 8 + %field = getelementptr inbounds %struct.s, %struct.s* %1, i64 0, i32 0 + %2 = load i32, i32* %field, align 4 + %add = add i32 %i, 1 + %idxprom1 = zext i32 %add to i64 + %arrayidx2 = getelementptr inbounds %struct.s*, %struct.s** %0, i64 %idxprom1 + %3 = load %struct.s*, %struct.s** %arrayidx2, align 8 + %field3 = getelementptr inbounds %struct.s, %struct.s* %3, i64 0, i32 0 + %4 = load i32, i32* %field3, align 4 + %cmp = icmp ugt i32 %2, %4 + %add.i = select i1 %cmp, i32 %add, i32 %i + %idxprom4 = zext i32 %add.i to i64 + %arrayidx5 = getelementptr inbounds %struct.s*, %struct.s** %0, i64 %idxprom4 + %5 = load %struct.s*, %struct.s** %arrayidx5, align 8 + %field6 = getelementptr inbounds %struct.s, %struct.s* %5, i64 0, i32 0 + %6 = load i32, i32* %field6, align 4 + %cmp7 = icmp ugt i32 %6, %max + %conv = zext i1 %cmp7 to i32 + ret i32 %conv +}