Index: llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp +++ llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp @@ -397,12 +397,12 @@ int cmpTypes(Type *TyL, Type *TyR) const; int cmpNumbers(uint64_t L, uint64_t R) const; - int cmpAPInts(const APInt &L, const APInt &R) const; int cmpAPFloats(const APFloat &L, const APFloat &R) const; int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const; int cmpMem(StringRef L, StringRef R) const; int cmpAttrs(const AttributeSet L, const AttributeSet R) const; + int cmpRangeMetadata(const MDNode* L, const MDNode* R) const; // The two functions undergoing comparison. const Function *FnL, *FnR; @@ -481,13 +481,21 @@ } int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const { - // TODO: This correctly handles all existing fltSemantics, because they all - // have different precisions. This isn't very robust, however, if new types - // with different exponent ranges are introduced. + // Floats are ordered first by semantics (i.e. float, double, half, etc.), + // then by value interpreted as a bitstring (aka APInt). const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics(); if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL), APFloat::semanticsPrecision(SR))) return Res; + if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL), + APFloat::semanticsMaxExponent(SR))) + return Res; + if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL), + APFloat::semanticsMinExponent(SR))) + return Res; + if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL), + APFloat::semanticsSizeInBits(SR))) + return Res; return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt()); } @@ -524,6 +532,32 @@ } return 0; } +int FunctionComparator::cmpRangeMetadata(const MDNode* L, + const MDNode* R) const { + if (L == R) + return 0; + if (!L) + return -1; + if (!R) + return 1; + // Range metadata is a sequence of numbers. Make sure they are the same + // sequence. + // TODO: Note that as this is metadata, it is possible to drop and/or merge + // this data when considering functions to merge. Thus this comparison would + // return 0 (i.e. equivalent), but merging would become more complicated + // because the ranges would need to be unioned. It is not likely that + // functions differ ONLY in this metadata if they are actually the same + // function semantically. + if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) + return Res; + for (size_t I = 0; I < L->getNumOperands(); ++I) { + ConstantInt* LLow = mdconst::extract(L->getOperand(I)); + ConstantInt* RLow = mdconst::extract(R->getOperand(I)); + if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue())) + return Res; + } + return 0; +} /// Constants comparison: /// 1. Check whether type of L constant could be losslessly bitcasted to R @@ -607,7 +641,7 @@ return Res; if (const auto *SeqL = dyn_cast(L)) { - const auto *SeqR = dyn_cast(R); + const auto *SeqR = cast(R); // This handles ConstantDataArray and ConstantDataVector. Note that we // compare the two raw data arrays, which might differ depending on the host // endianness. This isn't a problem though, because the endiness of a module @@ -685,10 +719,38 @@ return 0; } case Value::BlockAddressVal: { - // FIXME: This still uses a pointer comparison. It isn't clear how to remove - // this. This only affects programs which take BlockAddresses and store them - // as constants, which is limited to interepreters, etc. - return cmpNumbers((uint64_t)L, (uint64_t)R); + const BlockAddress *LBA = cast(L); + const BlockAddress *RBA = cast(R); + if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction())) + return Res; + if (LBA->getFunction() == RBA->getFunction()) { + // They are BBs in the same function. Order by which comes first in the + // BB order of the function. This order is deterministic. + Function* F = LBA->getFunction(); + BasicBlock *LBB = LBA->getBasicBlock(); + BasicBlock *RBB = RBA->getBasicBlock(); + if (LBB == RBB) + return 0; + for(BasicBlock &BB : F->getBasicBlockList()) { + if (&BB == LBB) { + assert(&BB != RBB); + return -1; + } + if (&BB == RBB) + return 1; + } + llvm_unreachable("Basic Block Address does not point to a basic block in " + "its function."); + return -1; + } else { + // cmpValues said the functions are the same. So because they aren't + // literally the same pointer, they must respectively be the left and + // right functions. + assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR); + // cmpValues will tell us if these are equivalent BasicBlocks, in the + // context of their respective functions. + return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock()); + } } default: // Unknown constant, abort. DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n"); @@ -849,8 +911,8 @@ if (int Res = cmpNumbers(LI->getSynchScope(), cast(R)->getSynchScope())) return Res; - return cmpNumbers((uint64_t)LI->getMetadata(LLVMContext::MD_range), - (uint64_t)cast(R)->getMetadata(LLVMContext::MD_range)); + return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range), + cast(R)->getMetadata(LLVMContext::MD_range)); } if (const StoreInst *SI = dyn_cast(L)) { if (int Res = @@ -873,9 +935,9 @@ if (int Res = cmpAttrs(CI->getAttributes(), cast(R)->getAttributes())) return Res; - return cmpNumbers( - (uint64_t)CI->getMetadata(LLVMContext::MD_range), - (uint64_t)cast(R)->getMetadata(LLVMContext::MD_range)); + return cmpRangeMetadata( + CI->getMetadata(LLVMContext::MD_range), + cast(R)->getMetadata(LLVMContext::MD_range)); } if (const InvokeInst *CI = dyn_cast(L)) { if (int Res = cmpNumbers(CI->getCallingConv(), @@ -884,9 +946,9 @@ if (int Res = cmpAttrs(CI->getAttributes(), cast(R)->getAttributes())) return Res; - return cmpNumbers( - (uint64_t)CI->getMetadata(LLVMContext::MD_range), - (uint64_t)cast(R)->getMetadata(LLVMContext::MD_range)); + return cmpRangeMetadata( + CI->getMetadata(LLVMContext::MD_range), + cast(R)->getMetadata(LLVMContext::MD_range)); } if (const InsertValueInst *IVI = dyn_cast(L)) { ArrayRef LIndices = IVI->getIndices(); Index: llvm/trunk/test/Transforms/MergeFunc/merge-block-address-other-function.ll =================================================================== --- llvm/trunk/test/Transforms/MergeFunc/merge-block-address-other-function.ll +++ llvm/trunk/test/Transforms/MergeFunc/merge-block-address-other-function.ll @@ -0,0 +1,49 @@ +; RUN: opt -S -mergefunc < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @_Z1fi(i32 %i) #0 { +entry: + %retval = alloca i32, align 4 + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %cmp = icmp eq i32 %0, 1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 3, i32* %retval + br label %return + +if.end: + %1 = load i32, i32* %i.addr, align 4 + %cmp1 = icmp eq i32 %1, 3 + br i1 %cmp1, label %if.then.2, label %if.end.3 + +if.then.2: + store i32 56, i32* %retval + br label %return + +if.end.3: + store i32 0, i32* %retval + br label %return + +return: + %2 = load i32, i32* %retval + ret i32 %2 +} + + +define internal i8* @Afunc(i32* %P) { + store i32 1, i32* %P + store i32 3, i32* %P + ret i8* blockaddress(@_Z1fi, %if.then.2) +} + +define internal i8* @Bfunc(i32* %P) { +; CHECK-NOT: @Bfunc + store i32 1, i32* %P + store i32 3, i32* %P + ret i8* blockaddress(@_Z1fi, %if.then.2) +} Index: llvm/trunk/test/Transforms/MergeFunc/merge-block-address.ll =================================================================== --- llvm/trunk/test/Transforms/MergeFunc/merge-block-address.ll +++ llvm/trunk/test/Transforms/MergeFunc/merge-block-address.ll @@ -0,0 +1,91 @@ +; RUN: opt -S -mergefunc < %s | FileCheck %s + +; These two functions are identical. The basic block labels are the same, and +; induce the same CFG. We are testing that block addresses within different +; functions are compared by their value, and not based on order. Both functions +; come from the same C-code, but in the first the two val_0/val_1 basic blocks +; are in a different order (they were manually switched post-compilation). + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @_Z1fi(i32 %i) #0 { +entry: + %i.addr = alloca i32, align 4 + %ret = alloca i32, align 4 + %l = alloca i8*, align 8 + store i32 %i, i32* %i.addr, align 4 + store i32 0, i32* %ret, align 4 + store i8* blockaddress(@_Z1fi, %val_0), i8** %l, align 8 + %0 = load i32, i32* %i.addr, align 4 + %and = and i32 %0, 256 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i8* blockaddress(@_Z1fi, %val_1), i8** %l, align 8 + br label %if.end + +if.end: + %1 = load i8*, i8** %l, align 8 + br label %indirectgoto + +val_1: + store i32 42, i32* %ret, align 4 + br label %end + +val_0: + store i32 12, i32* %ret, align 4 + br label %end + + +end: + %2 = load i32, i32* %ret, align 4 + ret i32 %2 + +indirectgoto: + %indirect.goto.dest = phi i8* [ %1, %if.end ] + indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1] +} + +define i32 @_Z1gi(i32 %i) #0 { +; CHECK-LABEL: define i32 @_Z1gi +; CHECK-NEXT: tail call i32 @_Z1fi +; CHECK-NEXT: ret +entry: + %i.addr = alloca i32, align 4 + %ret = alloca i32, align 4 + %l = alloca i8*, align 8 + store i32 %i, i32* %i.addr, align 4 + store i32 0, i32* %ret, align 4 + store i8* blockaddress(@_Z1gi, %val_0), i8** %l, align 8 + %0 = load i32, i32* %i.addr, align 4 + %and = and i32 %0, 256 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i8* blockaddress(@_Z1gi, %val_1), i8** %l, align 8 + br label %if.end + +if.end: + %1 = load i8*, i8** %l, align 8 + br label %indirectgoto + +val_0: + store i32 12, i32* %ret, align 4 + br label %end + +val_1: + store i32 42, i32* %ret, align 4 + br label %end + +end: + %2 = load i32, i32* %ret, align 4 + ret i32 %2 + +indirectgoto: + %indirect.goto.dest = phi i8* [ %1, %if.end ] + indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1] +} + Index: llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll =================================================================== --- llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll +++ llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll @@ -0,0 +1,96 @@ +; RUN: opt -S -mergefunc < %s | FileCheck %s + +; There is a slight different in these two functions, in that the label values +; are switched. They are thus not mergeable. This tests that block addresses +; referring to blocks within each respective compared function are correctly +; ordered. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define i32 @_Z1fi(i32 %i) #0 { +; CHECK-LABEL: define i32 @_Z1fi +; CHECK-NEXT: entry: +; CHECK-NEXT: alloca +entry: + %i.addr = alloca i32, align 4 + %ret = alloca i32, align 4 + %l = alloca i8*, align 8 + store i32 %i, i32* %i.addr, align 4 + store i32 0, i32* %ret, align 4 +; Right here, this is val_0, and later the if might assign val_1 + store i8* blockaddress(@_Z1fi, %val_0), i8** %l, align 8 + %0 = load i32, i32* %i.addr, align 4 + %and = and i32 %0, 256 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i8* blockaddress(@_Z1fi, %val_1), i8** %l, align 8 + br label %if.end + +if.end: + %1 = load i8*, i8** %l, align 8 + br label %indirectgoto + +val_0: + store i32 12, i32* %ret, align 4 + br label %end + +val_1: + store i32 42, i32* %ret, align 4 + br label %end + +end: + %2 = load i32, i32* %ret, align 4 + ret i32 %2 + +indirectgoto: + %indirect.goto.dest = phi i8* [ %1, %if.end ] + indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1] +} + +; Function Attrs: nounwind uwtable +define i32 @_Z1gi(i32 %i) #0 { +; CHECK-LABEL: define i32 @_Z1gi +; CHECK-NEXT: entry: +; CHECK-NEXT: alloca +entry: + %i.addr = alloca i32, align 4 + %ret = alloca i32, align 4 + %l = alloca i8*, align 8 + store i32 %i, i32* %i.addr, align 4 + store i32 0, i32* %ret, align 4 +; This time, we store val_1 initially, and later the if might assign val_0 + store i8* blockaddress(@_Z1gi, %val_1), i8** %l, align 8 + %0 = load i32, i32* %i.addr, align 4 + %and = and i32 %0, 256 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i8* blockaddress(@_Z1gi, %val_0), i8** %l, align 8 + br label %if.end + +if.end: + %1 = load i8*, i8** %l, align 8 + br label %indirectgoto + +val_0: + store i32 12, i32* %ret, align 4 + br label %end + +val_1: + store i32 42, i32* %ret, align 4 + br label %end + +end: + %2 = load i32, i32* %ret, align 4 + ret i32 %2 + +indirectgoto: + %indirect.goto.dest = phi i8* [ %1, %if.end ] + indirectbr i8* %indirect.goto.dest, [label %val_1, label %val_0] +} + Index: llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll =================================================================== --- llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll +++ llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll @@ -0,0 +1,61 @@ +; RUN: opt -S -mergefunc < %s | FileCheck %s + +; We should not merge these two functions, because the blocks are different. +; This tests the handling of block addresses from different functions. +; ModuleID = '' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + + +define internal i8* @Afunc(i32* %P) { +; CHECK-LABEL: @Afunc +; CHECK-NEXT: store +; CHECK-NEXT: store +; CHECK-NEXT: ret + store i32 1, i32* %P + store i32 3, i32* %P + ret i8* blockaddress(@_Z1fi, %if.then) +} + +define internal i8* @Bfunc(i32* %P) { +; CHECK-LABEL: @Bfunc +; CHECK-NEXT: store +; CHECK-NEXT: store +; CHECK-NEXT: ret + store i32 1, i32* %P + store i32 3, i32* %P + ret i8* blockaddress(@_Z1fi, %if.then.2) +} + + +; Function Attrs: nounwind uwtable +define i32 @_Z1fi(i32 %i) #0 { +entry: + %retval = alloca i32, align 4 + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %cmp = icmp eq i32 %0, 1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 3, i32* %retval + br label %return + +if.end: + %1 = load i32, i32* %i.addr, align 4 + %cmp1 = icmp eq i32 %1, 3 + br i1 %cmp1, label %if.then.2, label %if.end.3 + +if.then.2: + store i32 56, i32* %retval + br label %return + +if.end.3: + store i32 0, i32* %retval + br label %return + +return: + %2 = load i32, i32* %retval + ret i32 %2 +} Index: llvm/trunk/test/Transforms/MergeFunc/ranges-multiple.ll =================================================================== --- llvm/trunk/test/Transforms/MergeFunc/ranges-multiple.ll +++ llvm/trunk/test/Transforms/MergeFunc/ranges-multiple.ll @@ -0,0 +1,44 @@ +; RUN: opt -mergefunc -S < %s | FileCheck %s +define i1 @cmp_with_range(i8*, i8*) { + %v1 = load i8, i8* %0, !range !0 + %v2 = load i8, i8* %1, !range !0 + %out = icmp eq i8 %v1, %v2 + ret i1 %out +} + +define i1 @cmp_no_range(i8*, i8*) { +; CHECK-LABEL: @cmp_no_range +; CHECK-NEXT: %v1 = load i8, i8* %0 +; CHECK-NEXT: %v2 = load i8, i8* %1 +; CHECK-NEXT: %out = icmp eq i8 %v1, %v2 +; CHECK-NEXT: ret i1 %out + %v1 = load i8, i8* %0 + %v2 = load i8, i8* %1 + %out = icmp eq i8 %v1, %v2 + ret i1 %out +} + +define i1 @cmp_different_range(i8*, i8*) { +; CHECK-LABEL: @cmp_different_range +; CHECK-NEXT: %v1 = load i8, i8* %0, !range !1 +; CHECK-NEXT: %v2 = load i8, i8* %1, !range !1 +; CHECK-NEXT: %out = icmp eq i8 %v1, %v2 +; CHECK-NEXT: ret i1 %out + %v1 = load i8, i8* %0, !range !1 + %v2 = load i8, i8* %1, !range !1 + %out = icmp eq i8 %v1, %v2 + ret i1 %out +} + +define i1 @cmp_with_same_range(i8*, i8*) { +; CHECK-LABEL: @cmp_with_same_range +; CHECK: tail call i1 @cmp_with_range + %v1 = load i8, i8* %0, !range !0 + %v2 = load i8, i8* %1, !range !0 + %out = icmp eq i8 %v1, %v2 + ret i1 %out +} + +; The comparison must check every element of the range, not just the first pair. +!0 = !{i8 0, i8 2, i8 21, i8 30} +!1 = !{i8 0, i8 2, i8 21, i8 25}