Index: llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp
===================================================================
--- llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp
+++ llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp
@@ -397,12 +397,12 @@
   int cmpTypes(Type *TyL, Type *TyR) const;
 
   int cmpNumbers(uint64_t L, uint64_t R) const;
-
   int cmpAPInts(const APInt &L, const APInt &R) const;
   int cmpAPFloats(const APFloat &L, const APFloat &R) const;
   int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const;
   int cmpMem(StringRef L, StringRef R) const;
   int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
+  int cmpRangeMetadata(const MDNode* L, const MDNode* R) const;
 
   // The two functions undergoing comparison.
   const Function *FnL, *FnR;
@@ -481,13 +481,21 @@
 }
 
 int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
-  // TODO: This correctly handles all existing fltSemantics, because they all
-  // have different precisions. This isn't very robust, however, if new types
-  // with different exponent ranges are introduced.
+  // Floats are ordered first by semantics (i.e. float, double, half, etc.),
+  // then by value interpreted as a bitstring (aka APInt).
   const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics();
   if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL),
                            APFloat::semanticsPrecision(SR)))
     return Res;
+  if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL),
+                           APFloat::semanticsMaxExponent(SR)))
+    return Res;
+  if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL),
+                           APFloat::semanticsMinExponent(SR)))
+    return Res;
+  if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL),
+                           APFloat::semanticsSizeInBits(SR)))
+    return Res;
   return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
 }
 
@@ -524,6 +532,32 @@
   }
   return 0;
 }
+int FunctionComparator::cmpRangeMetadata(const MDNode* L,
+                                         const MDNode* R) const {
+  if (L == R)
+    return 0;
+  if (!L)
+    return -1;
+  if (!R)
+    return 1;
+  // Range metadata is a sequence of numbers. Make sure they are the same
+  // sequence. 
+  // TODO: Note that as this is metadata, it is possible to drop and/or merge
+  // this data when considering functions to merge. Thus this comparison would
+  // return 0 (i.e. equivalent), but merging would become more complicated
+  // because the ranges would need to be unioned. It is not likely that
+  // functions differ ONLY in this metadata if they are actually the same
+  // function semantically.
+  if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+    return Res;
+  for (size_t I = 0; I < L->getNumOperands(); ++I) {
+    ConstantInt* LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
+    ConstantInt* RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
+    if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
+      return Res;
+  }
+  return 0;
+}
 
 /// Constants comparison:
 /// 1. Check whether type of L constant could be losslessly bitcasted to R
@@ -607,7 +641,7 @@
     return Res;
 
   if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) {
-    const auto *SeqR = dyn_cast<ConstantDataSequential>(R);
+    const auto *SeqR = cast<ConstantDataSequential>(R);
     // This handles ConstantDataArray and ConstantDataVector. Note that we
     // compare the two raw data arrays, which might differ depending on the host
     // endianness. This isn't a problem though, because the endiness of a module
@@ -685,10 +719,38 @@
     return 0;
   }
   case Value::BlockAddressVal: {
-    // FIXME: This still uses a pointer comparison. It isn't clear how to remove
-    // this. This only affects programs which take BlockAddresses and store them
-    // as constants, which is limited to interepreters, etc.
-    return cmpNumbers((uint64_t)L, (uint64_t)R);
+    const BlockAddress *LBA = cast<BlockAddress>(L);
+    const BlockAddress *RBA = cast<BlockAddress>(R);
+    if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction()))
+      return Res;
+    if (LBA->getFunction() == RBA->getFunction()) {
+      // They are BBs in the same function. Order by which comes first in the
+      // BB order of the function. This order is deterministic.
+      Function* F = LBA->getFunction();
+      BasicBlock *LBB = LBA->getBasicBlock();
+      BasicBlock *RBB = RBA->getBasicBlock();
+      if (LBB == RBB)
+        return 0;
+      for(BasicBlock &BB : F->getBasicBlockList()) {
+        if (&BB == LBB) {
+          assert(&BB != RBB);
+          return -1;
+        }
+        if (&BB == RBB)
+          return 1;
+      }
+      llvm_unreachable("Basic Block Address does not point to a basic block in "
+                       "its function.");
+      return -1;
+    } else {
+      // cmpValues said the functions are the same. So because they aren't
+      // literally the same pointer, they must respectively be the left and
+      // right functions.
+      assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR);
+      // cmpValues will tell us if these are equivalent BasicBlocks, in the
+      // context of their respective functions.
+      return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock());
+    }
   }
   default: // Unknown constant, abort.
     DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
@@ -849,8 +911,8 @@
     if (int Res =
             cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()))
       return Res;
-    return cmpNumbers((uint64_t)LI->getMetadata(LLVMContext::MD_range),
-                      (uint64_t)cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
+    return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
+        cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
   }
   if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
     if (int Res =
@@ -873,9 +935,9 @@
     if (int Res =
             cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
       return Res;
-    return cmpNumbers(
-        (uint64_t)CI->getMetadata(LLVMContext::MD_range),
-        (uint64_t)cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
+    return cmpRangeMetadata(
+        CI->getMetadata(LLVMContext::MD_range),
+        cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
   }
   if (const InvokeInst *CI = dyn_cast<InvokeInst>(L)) {
     if (int Res = cmpNumbers(CI->getCallingConv(),
@@ -884,9 +946,9 @@
     if (int Res =
             cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
       return Res;
-    return cmpNumbers(
-        (uint64_t)CI->getMetadata(LLVMContext::MD_range),
-        (uint64_t)cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
+    return cmpRangeMetadata(
+        CI->getMetadata(LLVMContext::MD_range),
+        cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
   }
   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
     ArrayRef<unsigned> LIndices = IVI->getIndices();
Index: llvm/trunk/test/Transforms/MergeFunc/merge-block-address-other-function.ll
===================================================================
--- llvm/trunk/test/Transforms/MergeFunc/merge-block-address-other-function.ll
+++ llvm/trunk/test/Transforms/MergeFunc/merge-block-address-other-function.ll
@@ -0,0 +1,49 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @_Z1fi(i32 %i) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4
+  %cmp = icmp eq i32 %0, 1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 3, i32* %retval
+  br label %return
+
+if.end:
+  %1 = load i32, i32* %i.addr, align 4
+  %cmp1 = icmp eq i32 %1, 3
+  br i1 %cmp1, label %if.then.2, label %if.end.3
+
+if.then.2:
+  store i32 56, i32* %retval
+  br label %return
+
+if.end.3:
+  store i32 0, i32* %retval
+  br label %return
+
+return:
+  %2 = load i32, i32* %retval
+  ret i32 %2
+}
+
+
+define internal i8* @Afunc(i32* %P) {
+  store i32 1, i32* %P
+  store i32 3, i32* %P
+  ret i8* blockaddress(@_Z1fi, %if.then.2)
+}
+
+define internal i8* @Bfunc(i32* %P) {
+; CHECK-NOT: @Bfunc
+  store i32 1, i32* %P
+  store i32 3, i32* %P
+  ret i8* blockaddress(@_Z1fi, %if.then.2)
+}
Index: llvm/trunk/test/Transforms/MergeFunc/merge-block-address.ll
===================================================================
--- llvm/trunk/test/Transforms/MergeFunc/merge-block-address.ll
+++ llvm/trunk/test/Transforms/MergeFunc/merge-block-address.ll
@@ -0,0 +1,91 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; These two functions are identical. The basic block labels are the same, and
+; induce the same CFG. We are testing that block addresses within different
+; functions are compared by their value, and not based on order. Both functions
+; come from the same C-code, but in the first the two val_0/val_1 basic blocks
+; are in a different order (they were manually switched post-compilation).
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @_Z1fi(i32 %i) #0 {
+entry:
+  %i.addr = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %l = alloca i8*, align 8
+  store i32 %i, i32* %i.addr, align 4
+  store i32 0, i32* %ret, align 4
+  store i8* blockaddress(@_Z1fi, %val_0), i8** %l, align 8
+  %0 = load i32, i32* %i.addr, align 4
+  %and = and i32 %0, 256
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i8* blockaddress(@_Z1fi, %val_1), i8** %l, align 8
+  br label %if.end
+
+if.end:
+  %1 = load i8*, i8** %l, align 8
+  br label %indirectgoto
+
+val_1:
+  store i32 42, i32* %ret, align 4
+  br label %end
+
+val_0:
+  store i32 12, i32* %ret, align 4
+  br label %end
+
+
+end:
+  %2 = load i32, i32* %ret, align 4
+  ret i32 %2
+
+indirectgoto:
+  %indirect.goto.dest = phi i8* [ %1, %if.end ]
+  indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1]
+}
+
+define i32 @_Z1gi(i32 %i) #0 {
+; CHECK-LABEL: define i32 @_Z1gi
+; CHECK-NEXT: tail call i32 @_Z1fi
+; CHECK-NEXT: ret
+entry:
+  %i.addr = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %l = alloca i8*, align 8
+  store i32 %i, i32* %i.addr, align 4
+  store i32 0, i32* %ret, align 4
+  store i8* blockaddress(@_Z1gi, %val_0), i8** %l, align 8
+  %0 = load i32, i32* %i.addr, align 4
+  %and = and i32 %0, 256
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i8* blockaddress(@_Z1gi, %val_1), i8** %l, align 8
+  br label %if.end
+
+if.end:
+  %1 = load i8*, i8** %l, align 8
+  br label %indirectgoto
+
+val_0:
+  store i32 12, i32* %ret, align 4
+  br label %end
+
+val_1:
+  store i32 42, i32* %ret, align 4
+  br label %end
+
+end:
+  %2 = load i32, i32* %ret, align 4
+  ret i32 %2
+
+indirectgoto:
+  %indirect.goto.dest = phi i8* [ %1, %if.end ]
+  indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1]
+}
+
Index: llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll
===================================================================
--- llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll
+++ llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll
@@ -0,0 +1,96 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; There is a slight different in these two functions, in that the label values
+; are switched. They are thus not mergeable. This tests that block addresses
+; referring to blocks within each respective compared function are correctly
+; ordered.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z1fi(i32 %i) #0 {
+; CHECK-LABEL: define i32 @_Z1fi
+; CHECK-NEXT: entry:
+; CHECK-NEXT: alloca
+entry:
+  %i.addr = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %l = alloca i8*, align 8
+  store i32 %i, i32* %i.addr, align 4
+  store i32 0, i32* %ret, align 4
+; Right here, this is val_0, and later the if might assign val_1
+  store i8* blockaddress(@_Z1fi, %val_0), i8** %l, align 8
+  %0 = load i32, i32* %i.addr, align 4
+  %and = and i32 %0, 256
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i8* blockaddress(@_Z1fi, %val_1), i8** %l, align 8
+  br label %if.end
+
+if.end:
+  %1 = load i8*, i8** %l, align 8
+  br label %indirectgoto
+
+val_0:
+  store i32 12, i32* %ret, align 4
+  br label %end
+
+val_1:
+  store i32 42, i32* %ret, align 4
+  br label %end
+
+end:
+  %2 = load i32, i32* %ret, align 4
+  ret i32 %2
+
+indirectgoto:
+  %indirect.goto.dest = phi i8* [ %1, %if.end ]
+  indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1]
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z1gi(i32 %i) #0 {
+; CHECK-LABEL: define i32 @_Z1gi
+; CHECK-NEXT: entry:
+; CHECK-NEXT: alloca
+entry:
+  %i.addr = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %l = alloca i8*, align 8
+  store i32 %i, i32* %i.addr, align 4
+  store i32 0, i32* %ret, align 4
+; This time, we store val_1 initially, and later the if might assign val_0
+  store i8* blockaddress(@_Z1gi, %val_1), i8** %l, align 8
+  %0 = load i32, i32* %i.addr, align 4
+  %and = and i32 %0, 256
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i8* blockaddress(@_Z1gi, %val_0), i8** %l, align 8
+  br label %if.end
+
+if.end:
+  %1 = load i8*, i8** %l, align 8
+  br label %indirectgoto
+
+val_0:
+  store i32 12, i32* %ret, align 4
+  br label %end
+
+val_1:
+  store i32 42, i32* %ret, align 4
+  br label %end
+
+end:
+  %2 = load i32, i32* %ret, align 4
+  ret i32 %2
+
+indirectgoto:
+  %indirect.goto.dest = phi i8* [ %1, %if.end ]
+  indirectbr i8* %indirect.goto.dest, [label %val_1, label %val_0]
+}
+
Index: llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll
===================================================================
--- llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll
+++ llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll
@@ -0,0 +1,61 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; We should not merge these two functions, because the blocks are different.
+; This tests the handling of block addresses from different functions.
+; ModuleID = '<stdin>'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+
+define internal i8* @Afunc(i32* %P) {
+; CHECK-LABEL: @Afunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store i32 1, i32* %P
+  store i32 3, i32* %P
+  ret i8* blockaddress(@_Z1fi, %if.then)
+}
+
+define internal i8* @Bfunc(i32* %P) {
+; CHECK-LABEL: @Bfunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store i32 1, i32* %P
+  store i32 3, i32* %P
+  ret i8* blockaddress(@_Z1fi, %if.then.2)
+}
+
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z1fi(i32 %i) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4
+  %cmp = icmp eq i32 %0, 1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 3, i32* %retval
+  br label %return
+
+if.end:
+  %1 = load i32, i32* %i.addr, align 4
+  %cmp1 = icmp eq i32 %1, 3
+  br i1 %cmp1, label %if.then.2, label %if.end.3
+
+if.then.2:
+  store i32 56, i32* %retval
+  br label %return
+
+if.end.3:
+  store i32 0, i32* %retval
+  br label %return
+
+return:
+  %2 = load i32, i32* %retval
+  ret i32 %2
+}
Index: llvm/trunk/test/Transforms/MergeFunc/ranges-multiple.ll
===================================================================
--- llvm/trunk/test/Transforms/MergeFunc/ranges-multiple.ll
+++ llvm/trunk/test/Transforms/MergeFunc/ranges-multiple.ll
@@ -0,0 +1,44 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+define i1 @cmp_with_range(i8*, i8*) {
+  %v1 = load i8, i8* %0, !range !0
+  %v2 = load i8, i8* %1, !range !0
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_no_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_no_range
+; CHECK-NEXT: %v1 = load i8, i8* %0
+; CHECK-NEXT:  %v2 = load i8, i8* %1
+; CHECK-NEXT:  %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT:  ret i1 %out
+  %v1 = load i8, i8* %0
+  %v2 = load i8, i8* %1
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_different_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_different_range
+; CHECK-NEXT:  %v1 = load i8, i8* %0, !range !1
+; CHECK-NEXT:  %v2 = load i8, i8* %1, !range !1
+; CHECK-NEXT:  %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT:  ret i1 %out
+  %v1 = load i8, i8* %0, !range !1
+  %v2 = load i8, i8* %1, !range !1
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_with_same_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_with_same_range
+; CHECK: tail call i1 @cmp_with_range
+  %v1 = load i8, i8* %0, !range !0
+  %v2 = load i8, i8* %1, !range !0
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+; The comparison must check every element of the range, not just the first pair.
+!0 = !{i8 0, i8 2, i8 21, i8 30}
+!1 = !{i8 0, i8 2, i8 21, i8 25}