Index: lib/Transforms/Scalar/GVN.cpp =================================================================== --- lib/Transforms/Scalar/GVN.cpp +++ lib/Transforms/Scalar/GVN.cpp @@ -126,6 +126,7 @@ public: ValueTable() : nextValueNumber(1) { } uint32_t lookup_or_add(Value *V); + bool hasValue(Value *V) const; uint32_t lookup(Value *V) const; uint32_t lookup_or_add_cmp(unsigned Opcode, CmpInst::Predicate Pred, Value *LHS, Value *RHS); @@ -468,6 +469,11 @@ return VI->second; } +/// Returns if the value has been numbered or not. +bool ValueTable::hasValue(Value *V) const { + return valueNumbering.find(V) != valueNumbering.end(); +} + /// Returns the value number of the given comparison, /// assigning it a new number if it did not have one before. Useful when /// we deduced the result of a comparison, but don't immediately have an @@ -638,6 +644,29 @@ DominatorTree &getDominatorTree() const { return *DT; } AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } MemoryDependenceAnalysis &getMemDep() const { return *MD; } + + // We try to assign a number to newly created instr only if it would have + // been processed. If there is an existing VN, then the existing value is + // returned. Otherwise, the current value is returned. + Value *findOrAddNewInstruction(Value *Val) { + if (Instruction *I = dyn_cast(Val)) { + // FIXME: if current BB hasn't been processed yet, don't try to assign + // a number. What if Val is the first inst or the first inst is also a + // newly created instruction? + if (!VN.hasValue(&*(I->getParent()->getFirstInsertionPt()))) + return Val; + uint32_t NextNum = VN.getNextUnusedValueNumber(); + unsigned Num = VN.lookup_or_add(I); + if (Num >= NextNum) + addToLeaderTable(Num, I, I->getParent()); + else if (Value *Existing = findLeader(I->getParent(), Num)) + return Existing; + else + addToLeaderTable(Num, I, I->getParent()); + } + return Val; + } + private: /// Push a new Value to the LeaderTable onto the list for its value number. void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) { @@ -1124,9 +1153,9 @@ /// that the store provides bits used by the load but we the pointers don't /// mustalias. Check this case to see if there is anything more we can do /// before we give up. -static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, - Type *LoadTy, - Instruction *InsertPt, const DataLayout &DL){ +static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, + Instruction *InsertPt, const DataLayout &DL, + GVN &gvn) { LLVMContext &Ctx = SrcVal->getType()->getContext(); uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; @@ -1137,10 +1166,12 @@ // Compute which bits of the stored value are being used by the load. Convert // to an integer type to start with. if (SrcVal->getType()->getScalarType()->isPointerTy()) - SrcVal = Builder.CreatePtrToInt(SrcVal, - DL.getIntPtrType(SrcVal->getType())); + SrcVal = gvn.findOrAddNewInstruction( + Builder.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType()))); + if (!SrcVal->getType()->isIntegerTy()) - SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8)); + SrcVal = gvn.findOrAddNewInstruction( + Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8))); // Shift the bits to the least significant depending on endianness. unsigned ShiftAmt; @@ -1150,10 +1181,11 @@ ShiftAmt = (StoreSize-LoadSize-Offset)*8; if (ShiftAmt) - SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt); + SrcVal = gvn.findOrAddNewInstruction(Builder.CreateLShr(SrcVal, ShiftAmt)); if (LoadSize != StoreSize) - SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8)); + SrcVal = gvn.findOrAddNewInstruction( + Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize * 8))); return CoerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL); } @@ -1191,7 +1223,8 @@ DestPTy = PointerType::get(DestPTy, PtrVal->getType()->getPointerAddressSpace()); Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); - PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); + PtrVal = + gvn.findOrAddNewInstruction(Builder.CreateBitCast(PtrVal, DestPTy)); LoadInst *NewLoad = Builder.CreateLoad(PtrVal); NewLoad->takeName(SrcVal); NewLoad->setAlignment(SrcVal->getAlignment()); @@ -1203,9 +1236,10 @@ // system, we need to shift down to get the relevant bits. Value *RV = NewLoad; if (DL.isBigEndian()) - RV = Builder.CreateLShr(RV, - NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits()); - RV = Builder.CreateTrunc(RV, SrcVal->getType()); + RV = gvn.findOrAddNewInstruction(Builder.CreateLShr( + RV, NewLoadSize * 8 - SrcVal->getType()->getPrimitiveSizeInBits())); + RV = + gvn.findOrAddNewInstruction(Builder.CreateTrunc(RV, SrcVal->getType())); SrcVal->replaceAllUsesWith(RV); // We would like to use gvn.markInstructionForDeletion here, but we can't @@ -1217,15 +1251,14 @@ SrcVal = NewLoad; } - return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL); + return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL, gvn); } - /// This function is called when we have a /// memdep query of a load that ends up being a clobbering mem intrinsic. static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, Type *LoadTy, Instruction *InsertPt, - const DataLayout &DL){ + const DataLayout &DL, GVN &gvn) { LLVMContext &Ctx = LoadTy->getContext(); uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy)/8; @@ -1238,23 +1271,24 @@ // independently of what the offset is. Value *Val = MSI->getValue(); if (LoadSize != 1) - Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); - + Val = gvn.findOrAddNewInstruction( + Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize * 8))); Value *OneElt = Val; // Splat the value out to the right number of bits. for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { // If we can double the number of bytes set, do it. if (NumBytesSet*2 <= LoadSize) { - Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8); - Val = Builder.CreateOr(Val, ShVal); + Value *ShVal = gvn.findOrAddNewInstruction( + Builder.CreateShl(Val, NumBytesSet * 8)); + Val = gvn.findOrAddNewInstruction(Builder.CreateOr(Val, ShVal)); NumBytesSet <<= 1; continue; } // Otherwise insert one byte at a time. - Value *ShVal = Builder.CreateShl(Val, 1*8); - Val = Builder.CreateOr(OneElt, ShVal); + Value *ShVal = gvn.findOrAddNewInstruction(Builder.CreateShl(Val, 1 * 8)); + Val = gvn.findOrAddNewInstruction(Builder.CreateOr(OneElt, ShVal)); ++NumBytesSet; } @@ -1321,7 +1355,8 @@ if (isSimpleValue()) { Res = getSimpleValue(); if (Res->getType() != LoadTy) { - Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), DL); + Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), DL, + gvn); DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " << *getSimpleValue() << '\n' @@ -1341,10 +1376,10 @@ } } else if (isMemIntrinValue()) { Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy, - BB->getTerminator(), DL); + BB->getTerminator(), DL, gvn); DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset - << " " << *getMemIntrinValue() << '\n' - << *Res << '\n' << "\n\n\n"); + << " " << *getMemIntrinValue() << '\n' << *Res << '\n' + << "\n\n\n"); } else { assert(isUndefValue() && "Should be UndefVal"); DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";); @@ -1901,7 +1936,7 @@ L->getType(), L->getPointerOperand(), DepSI); if (Offset != -1) AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, - L->getType(), L, DL); + L->getType(), L, DL, *this); } // Check to see if we have something like this: @@ -1926,7 +1961,8 @@ int Offset = AnalyzeLoadFromClobberingMemInst( L->getType(), L->getPointerOperand(), DepMI, DL); if (Offset != -1) - AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, DL); + AvailVal = + GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, DL, *this); } if (AvailVal) { Index: test/Transforms/GVN/pr25440.ll =================================================================== --- /dev/null +++ test/Transforms/GVN/pr25440.ll @@ -0,0 +1,108 @@ +;RUN: opt -gvn -S < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64" +target triple = "thumbv7--linux-gnueabi" + +%struct.a = type { i16, i16, [1 x %union.a] } +%union.a = type { i32 } + +@length = external global [0 x i32], align 4 + +; Function Attrs: nounwind +define fastcc void @foo(%struct.a* nocapture readonly %x) { +;CHECK-LABEL: foo +entry: + br label %bb0 + +bb0: ; preds = %land.lhs.true, %entry +;CHECK: bb0: + %x.tr = phi %struct.a* [ %x, %entry ], [ null, %land.lhs.true ] + %code1 = getelementptr inbounds %struct.a, %struct.a* %x.tr, i32 0, i32 0 + %0 = load i16, i16* %code1, align 4 +; CHECK: load i32, i32* + %conv = zext i16 %0 to i32 + switch i32 %conv, label %if.end.50 [ + i32 43, label %cleanup + i32 52, label %if.then.5 + ] + +if.then.5: ; preds = %bb0 + br i1 undef, label %land.lhs.true, label %if.then.26 + +land.lhs.true: ; preds = %if.then.5 + br i1 undef, label %cleanup, label %bb0 + +if.then.26: ; preds = %if.then.5 + %x.tr.lcssa163 = phi %struct.a* [ %x.tr, %if.then.5 ] + br i1 undef, label %cond.end, label %cond.false + +cond.false: ; preds = %if.then.26 +; CHECK: cond.false: +; CHECK-NOT: load + %mode = getelementptr inbounds %struct.a, %struct.a* %x.tr.lcssa163, i32 0, i32 1 + %bf.load = load i16, i16* %mode, align 2 + %bf.shl = shl i16 %bf.load, 8 + br label %cond.end + +cond.end: ; preds = %cond.false, %if.then.26 + br i1 undef, label %if.then.44, label %cleanup + +if.then.44: ; preds = %cond.end + unreachable + +if.end.50: ; preds = %bb0 +;%CHECK: if.end.50: + %conv.lcssa = phi i32 [ %conv, %bb0 ] + %arrayidx52 = getelementptr inbounds [0 x i32], [0 x i32]* @length, i32 0, i32 %conv.lcssa + %1 = load i32, i32* %arrayidx52, align 4 + br i1 undef, label %for.body.57, label %cleanup + +for.body.57: ; preds = %if.end.50 + %i.2157 = add nsw i32 %1, -1 + unreachable + +cleanup: ; preds = %if.end.50, %cond.end, %land.lhs.true, %bb0 + ret void +} + +@yy_c_buf_p = external unnamed_addr global i8*, align 4 +@dfg_text = external global i8*, align 4 + +define void @dfg_lex() { +;CHECK-LABEL: dfg_lex +entry: + br label %while.bodythread-pre-split + +while.bodythread-pre-split: ; preds = %while.end, %while.end, %entry + br i1 undef, label %if.then.14, label %if.end.15 + +if.then.14: ; preds = %while.end, %while.bodythread-pre-split + %v1 = load i32, i32* bitcast (i8** @dfg_text to i32*), align 4 + %sub.ptr.sub = sub i32 undef, %v1 + br label %if.end.15 + +if.end.15: ; preds = %if.then.14, %while.bodythread-pre-split + %v2 = load i8*, i8** @yy_c_buf_p, align 4 + br label %while.cond.16 + +while.cond.16: ; preds = %while.cond.16, %if.end.15 + br i1 undef, label %while.cond.16, label %while.end + +while.end: ; preds = %while.cond.16 + %add.ptr = getelementptr inbounds i8, i8* %v2, i32 undef + store i8* %add.ptr, i8** @dfg_text, align 4 + %sub.ptr.rhs.cast25 = ptrtoint i8* %add.ptr to i32 + %sub.ptr.sub26 = sub i32 0, %sub.ptr.rhs.cast25 + switch i32 undef, label %sw.default [ + i32 65, label %while.bodythread-pre-split + i32 3, label %return + i32 57, label %while.bodythread-pre-split + i32 60, label %if.then.14 + ] + +sw.default: ; preds = %while.end + unreachable + +return: ; preds = %while.end + ret void +}