Index: lib/Transforms/Scalar/GVN.cpp =================================================================== --- lib/Transforms/Scalar/GVN.cpp +++ lib/Transforms/Scalar/GVN.cpp @@ -608,6 +608,10 @@ DenseMap LeaderTable; BumpPtrAllocator TableAllocator; + // Block-local map of equivalent values to their leader, does not + // propagate to any successors. Entries added mid-block are applied + // to the remaining instructions in the block. + SmallMapVector ReplaceWithConstMap; SmallVector InstrsToErase; typedef SmallVector LoadDepVect; @@ -699,6 +703,7 @@ // Helper functions of redundant load elimination bool processLoad(LoadInst *L); bool processNonLocalLoad(LoadInst *L); + bool processAssumeIntrinsic(IntrinsicInst *II); void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, AvailValInBlkVect &ValuesPerBlock, UnavailBlkVect &UnavailableBlocks); @@ -719,6 +724,7 @@ void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ); + bool replaceOperandsWithConsts(Instruction *I) const; bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root); bool processFoldableCondBr(BranchInst *BI); void addDeadBlock(BasicBlock *BB); @@ -1759,6 +1765,38 @@ return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks); } +bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { + assert(IntrinsicI->getIntrinsicID() == Intrinsic::assume && + "This function can only be called with llvm.assume intrinsic"); + Value *V = IntrinsicI->getArgOperand(0); + Constant *True = ConstantInt::getTrue(V->getContext()); + bool Changed = false; + for (BasicBlock *Successor : successors(IntrinsicI->getParent())) { + BasicBlockEdge Edge(IntrinsicI->getParent(), Successor); + + // Equality propagation can't be done for every successor, + // but propagateEquality checks it. + Changed |= propagateEquality(V, True, Edge); + } + + if (auto *CmpI = dyn_cast(V)) { + if (CmpI->getPredicate() == CmpInst::Predicate::ICMP_EQ || + CmpI->getPredicate() == CmpInst::Predicate::FCMP_OEQ || + (CmpI->getPredicate() == CmpInst::Predicate::FCMP_UEQ && + CmpI->getFastMathFlags().noNaNs())) { + Value *CmpLHS = CmpI->getOperand(0); + Value *CmpRHS = CmpI->getOperand(1); + if (isa(CmpLHS)) + std::swap(CmpLHS, CmpRHS); + auto *RHSConst = dyn_cast(CmpRHS); + + // If only one operand is constant. + if (RHSConst != nullptr && !isa(CmpLHS)) + ReplaceWithConstMap[CmpLHS] = RHSConst; + } + } + return Changed; +} static void patchReplacementInstruction(Instruction *I, Value *Repl) { // Patch the replacement so that it is not more restrictive than the value @@ -2031,6 +2069,21 @@ return Pred != nullptr; } +// Tries to replace instruction with const, using information from +// ReplaceWithConstMap. +bool GVN::replaceOperandsWithConsts(Instruction *Instr) const { + bool Changed = false; + for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) { + Value *operand = Instr->getOperand(OpNum); + auto it = ReplaceWithConstMap.find(operand); + if (it != ReplaceWithConstMap.end()) { + Instr->setOperand(OpNum, it->second); + Changed = true; + } + } + return Changed; +} + /// The given values are known to be equal in every block /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with /// 'RHS' everywhere in the scope. Returns whether a change was made. @@ -2047,11 +2100,13 @@ std::pair Item = Worklist.pop_back_val(); LHS = Item.first; RHS = Item.second; - if (LHS == RHS) continue; + if (LHS == RHS) + continue; assert(LHS->getType() == RHS->getType() && "Equality but unequal types!"); // Don't try to propagate equalities between constants. - if (isa(LHS) && isa(RHS)) continue; + if (isa(LHS) && isa(RHS)) + continue; // Prefer a constant on the right-hand side, or an Argument if no constants. if (isa(LHS) || (isa(LHS) && !isa(RHS))) @@ -2202,6 +2257,10 @@ return true; } + if (IntrinsicInst *IntrinsicI = dyn_cast(I)) + if (IntrinsicI->getIntrinsicID() == Intrinsic::assume) + return processAssumeIntrinsic(IntrinsicI); + if (LoadInst *LI = dyn_cast(I)) { if (processLoad(LI)) return true; @@ -2266,7 +2325,8 @@ // Instructions with void type don't return a value, so there's // no point in trying to find redundancies in them. - if (I->getType()->isVoidTy()) return false; + if (I->getType()->isVoidTy()) + return false; uint32_t NextNum = VN.getNextUnusedValueNumber(); unsigned Num = VN.lookup_or_add(I); @@ -2373,10 +2433,15 @@ if (DeadBlocks.count(BB)) return false; + // Clearing map before every BB because it can be used only for single BB. + ReplaceWithConstMap.clear(); bool ChangedFunction = false; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { + if (!ReplaceWithConstMap.empty()) + ChangedFunction |= replaceOperandsWithConsts(BI); + ChangedFunction |= processInstruction(BI); if (InstrsToErase.empty()) { ++BI; Index: test/Transforms/GVN/assume-ptr-equal.ll =================================================================== --- /dev/null +++ test/Transforms/GVN/assume-ptr-equal.ll @@ -0,0 +1,115 @@ +; RUN: opt < %s -gvn -S | FileCheck %s + +%struct.A = type { i32 (...)** } +@_ZTV1A = available_externally unnamed_addr constant [4 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3barEv to i8*)], align 8 +@_ZTI1A = external constant i8* + +; Checks if indirect calls can be replaced with direct +; assuming that %vtable == @_ZTV1A (with alignment). +; Checking const propagation across other BBs +; CHECK-LABEL: define void @_Z1gb( +; CHECK: call i32 @_ZN1A3fooEv( +; CHECK: call i32 @_ZN1A3barEv( + +define void @_Z1gb(i1 zeroext %p) { +entry: + %call = tail call noalias i8* @_Znwm(i64 8) #4 + %0 = bitcast i8* %call to %struct.A* + tail call void @_ZN1AC1Ev(%struct.A* %0) #1 + %1 = bitcast i8* %call to i8*** + %vtable = load i8**, i8*** %1, align 8 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2) + tail call void @llvm.assume(i1 %cmp.vtables) + br i1 %p, label %if.then, label %if.else + +if.then: ; preds = %entry + %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)** + %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8 + %call2 = tail call i32 %2(%struct.A* %0) #1 + br label %if.end + +if.else: ; preds = %entry + %vfn47 = getelementptr inbounds i8*, i8** %vtable, i64 1 + %vfn4 = bitcast i8** %vfn47 to i32 (%struct.A*)** + %3 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vfn4, align 8 + %call5 = tail call i32 %3(%struct.A* %0) #1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +; Checking const propagation in the same BB +; CHECK-LABEL: define i32 @main() +; CHECK: call i32 @_ZN1A3fooEv( + +define i32 @main() { +entry: + %call = tail call noalias i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.A* + tail call void @_ZN1AC1Ev(%struct.A* %0) + %1 = bitcast i8* %call to i8*** + %vtable = load i8**, i8*** %1, align 8 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2) + tail call void @llvm.assume(i1 %cmp.vtables) + %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)** + %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8 + %call2 = tail call i32 %2(%struct.A* %0) + ret i32 0 +} + +; This tests checks const propatation with fcmp instruction. +; CHECK-LABEL: define float @_Z1gf(float %p) +; CHECK: ret float 3.000000e+00 + +define float @_Z1gf(float %p) { +entry: + %p.addr = alloca float, align 4 + %f = alloca float, align 4 + store float %p, float* %p.addr, align 4 + + store float 3.000000e+00, float* %f, align 4 + %0 = load float, float* %p.addr, align 4 + %1 = load float, float* %f, align 4 + %cmp = fcmp oeq float %1, %0 ; note const on lhs + call void @llvm.assume(i1 %cmp) + + ret float %0 +} + +; CHECK-LABEL: define float @_Z1hf(float %p) +; CHECK: ret float 3.000000e+00 + +define float @_Z1hf(float %p) { +entry: + %p.addr = alloca float, align 4 + store float %p, float* %p.addr, align 4 + + %0 = load float, float* %p.addr, align 4 + %cmp = fcmp nnan ueq float %0, 3.000000e+00 + call void @llvm.assume(i1 %cmp) + + ret float %0 +} + +; CHECK-LABEL: define float @_Z1if(float %p) +; CHECK-NOT: ret float 3.000000e+00 + +define float @_Z1if(float %p) { +entry: + %p.addr = alloca float, align 4 + store float %p, float* %p.addr, align 4 + + %0 = load float, float* %p.addr, align 4 + %cmp = fcmp ueq float %0, 3.000000e+00 ; no nnan flag - can't propagate + call void @llvm.assume(i1 %cmp) + + ret float %0 +} + +declare noalias i8* @_Znwm(i64) +declare void @_ZN1AC1Ev(%struct.A*) +declare void @llvm.assume(i1) +declare i32 @_ZN1A3fooEv(%struct.A*) +declare i32 @_ZN1A3barEv(%struct.A*) +