Index: lib/Transforms/IPO/MergeFunctions.cpp =================================================================== --- lib/Transforms/IPO/MergeFunctions.cpp +++ lib/Transforms/IPO/MergeFunctions.cpp @@ -410,6 +410,9 @@ int cmpMem(StringRef L, StringRef R) const; int cmpAttrs(const AttributeSet L, const AttributeSet R) const; int cmpRangeMetadata(const MDNode *L, const MDNode *R) const; + int cmpLoopMetadata(const MDNode *L, const MDNode *R) const; + int cmpLoopOperandMetadata(const Metadata *L, const Metadata *R) const; + int cmpMDString(const MDString *L, const MDString *R) const; int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const; // The two functions undergoing comparison. @@ -574,6 +577,90 @@ return 0; } +int FunctionComparator::cmpLoopMetadata(const MDNode *L, + const MDNode *R) const { + if (!L && !R) + return 0; // Not an identified loop, just a branch. + if (!L) + return -1; + if (!R) + return 1; + assert(L != R && + "llvm.loop metadata is guaranteed to be separate for each loop"); + assert(L->getNumOperands() > 0 && L->getOperand(0) == L && + "loop ID should refer to self"); + assert(R->getNumOperands() > 0 && R->getOperand(0) == R && + "loop ID should refer to self"); + if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) + return Res; + if (R->getNumOperands() == 1) + // Two self-referring (for uniqueness) llvm.loop metadata are otherwise the + // same and can be merged. + return 0; + for (size_t I = 1; I < L->getNumOperands(); ++I) + if (int Res = cmpLoopOperandMetadata(L->getOperand(I), R->getOperand(I))) + return Res; + return 0; +} + +int FunctionComparator::cmpLoopOperandMetadata(const Metadata *L, + const Metadata *R) const { + if (L == R) + return 0; + if (!L) + return -1; + if (!R) + return 1; + if (int Res = cmpNumbers(L->getMetadataID(), R->getMetadataID())) + return Res; + + assert((dyn_cast(L) || dyn_cast(L)) && + "loop metadata is either string or node"); + + if (const auto *StrL = dyn_cast(L)) + return cmpMDString(StrL, cast(R)); + + // Not a string, must be a node. + const auto *NodeL = cast(L); + const auto *NodeR = cast(R); + if (int Res = cmpNumbers(NodeL->getNumOperands(), NodeR->getNumOperands())) + return Res; + + for (size_t I = 0; I < NodeL->getNumOperands(); ++I) { + const Metadata *LOp = NodeL->getOperand(I); + const Metadata *ROp = NodeR->getOperand(I); + + if (int Res = cmpNumbers(LOp->getMetadataID(), ROp->getMetadataID())) + return Res; + assert((dyn_cast(LOp) || dyn_cast(LOp)) && + "loop metadata operand is either string or constant"); + if (auto *StrL = dyn_cast(LOp)) { + if (int Res = cmpMDString(StrL, cast(ROp))) + return Res; + continue; + } + const Constant *ConstL = cast(LOp)->getValue(); + const Constant *ConstR = cast(ROp)->getValue(); + if (int Res = cmpConstants(ConstL, ConstR)) + return Res; + continue; + + llvm_unreachable("Unhandled loop metadata operand type"); + } + return 0; +} + +int FunctionComparator::cmpMDString(const MDString *L, + const MDString *R) const { + if (L == R) + return 0; + if (!L) + return -1; + if (!R) + return 1; + return L->getString().compare(R->getString()); +} + int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const { ImmutableCallSite LCS(L); @@ -607,6 +694,12 @@ /// For more details see declaration comments. int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) const { + if (L == R) + return 0; + if (!L) + return -1; + if (!R) + return 1; Type *TyL = L->getType(); Type *TyR = R->getType(); @@ -1052,6 +1145,11 @@ return cmpNumbers(RMWI->getSynchScope(), cast(R)->getSynchScope()); } + if (const BranchInst *Br = dyn_cast(L)) { + return cmpLoopMetadata( + Br->getMetadata(LLVMContext::MD_loop), + cast(R)->getMetadata(LLVMContext::MD_loop)); + } return 0; } Index: test/Transforms/MergeFunc/loop-metadata-different.ll =================================================================== --- /dev/null +++ test/Transforms/MergeFunc/loop-metadata-different.ll @@ -0,0 +1,54 @@ +; RUN: opt -mergefunc -S < %s | FileCheck %s + +; Test that different llvm.loop metadata prevents merging two functions which +; don't otherwise differ. This is tricky because this type of metadata refer to +; a metadata node that is guaranteed to be separate for each loop. + +; CHECK-LABEL: @enabled( +; CHECK-NOT: call +define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %N + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + store i32 %add, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 + +for.end: ; preds = %for.body + %1 = load i32, i32* %a, align 4 + ret i32 %1 +} + +; CHECK-LABEL: @disabled( +; CHECK-NOT: call +define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %N + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + store i32 %add, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2 + +for.end: ; preds = %for.body + %1 = load i32, i32* %a, align 4 + ret i32 %1 +} + +!0 = !{!0, !1} +!1 = !{!"llvm.loop.vectorize.enable", i1 1} +!2 = !{!2, !3} +!3 = !{!"llvm.loop.vectorize.enable", i1 0} Index: test/Transforms/MergeFunc/loop-metadata-one-missing.ll =================================================================== --- /dev/null +++ test/Transforms/MergeFunc/loop-metadata-one-missing.ll @@ -0,0 +1,53 @@ +; RUN: opt -mergefunc -S < %s | FileCheck %s + +; Test that two loops, one with llvm.loop metadata and one without, prevents +; merging two functions which don't otherwise differ. This is tricky because +; this type of metadata refer to a metadata node that is guaranteed to be +; separate for each loop. + +; CHECK-LABEL: @enabled( +; CHECK-NOT: call +define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %N + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + store i32 %add, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 + +for.end: ; preds = %for.body + %1 = load i32, i32* %a, align 4 + ret i32 %1 +} + +; CHECK-LABEL: @nometadata( +; CHECK-NOT: call +define i32 @nometadata(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %N + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + store i32 %add, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + %1 = load i32, i32* %a, align 4 + ret i32 %1 +} + +!0 = !{!0, !1} +!1 = !{!"llvm.loop.vectorize.enable", i1 1} Index: test/Transforms/MergeFunc/loop-metadata-same.ll =================================================================== --- /dev/null +++ test/Transforms/MergeFunc/loop-metadata-same.ll @@ -0,0 +1,54 @@ +; RUN: opt -mergefunc -S < %s | FileCheck %s + +; Test that same llvm.loop metadata doesn't prevents merging two functions which +; don't otherwise differ. This is tricky because this type of metadata refer to +; a metadata node that is guaranteed to be separate for each loop. + +; CHECK-LABEL: @enabled1( +; CHECK-NOT: call +define i32 @enabled1(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %N + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + store i32 %add, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 + +for.end: ; preds = %for.body + %1 = load i32, i32* %a, align 4 + ret i32 %1 +} + +; CHECK-LABEL: @enabled2( +; CHECK: tail call i32 @enabled1 +define i32 @enabled2(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %N + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + store i32 %add, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2 + +for.end: ; preds = %for.body + %1 = load i32, i32* %a, align 4 + ret i32 %1 +} + +!0 = !{!0, !1} +!1 = !{!"llvm.loop.vectorize.enable", i1 1} +!2 = !{!2, !3} +!3 = !{!"llvm.loop.vectorize.enable", i1 1} Index: test/Transforms/MergeFunc/loop-metadata-self-ref.ll =================================================================== --- /dev/null +++ test/Transforms/MergeFunc/loop-metadata-self-ref.ll @@ -0,0 +1,53 @@ +; RUN: opt -mergefunc -S < %s | FileCheck %s + +; Test that self-referencing llvm.loop metadata without other content doesn't +; prevent merging two functions which don't otherwise differ. This is tricky +; because this type of metadata refer to a metadata node that is guaranteed to +; be separate for each loop. + +; CHECK-LABEL: @loop1( +; CHECK-NOT: call +define i32 @loop1(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %N + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + store i32 %add, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 + +for.end: ; preds = %for.body + %1 = load i32, i32* %a, align 4 + ret i32 %1 +} + +; CHECK-LABEL: @loop2( +; CHECK: tail call i32 @loop1 +define i32 @loop2(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %N + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + store i32 %add, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + +for.end: ; preds = %for.body + %1 = load i32, i32* %a, align 4 + ret i32 %1 +} + +!0 = !{!0} +!1 = !{!1}