Index: llvm/trunk/docs/NVPTXUsage.rst
===================================================================
--- llvm/trunk/docs/NVPTXUsage.rst
+++ llvm/trunk/docs/NVPTXUsage.rst
@@ -289,7 +289,7 @@
       return my_function_precise(a);
   }
 
-The default value for all unspecified reflection parameters is zero. 
+The default value for all unspecified reflection parameters is zero.
 
 The ``NVVMReflect`` pass should be executed early in the optimization
 pipeline, immediately after the link stage. The ``internalize`` pass is also
@@ -326,6 +326,18 @@
 Therefore, it is recommended that ``NVVMReflect`` is executed early in the
 optimization pipeline before dead-code elimination.
 
+The NVPTX TargetMachine knows how to schedule ``NVVMReflect`` at the beginning
+of your pass manager; just use the following code when setting up your pass
+manager:
+
+.. code-block:: c++
+    std::unique_ptr<TargetMachine> TM = ...;
+    PassManagerBuilder PMBuilder(...);
+    PMBuilder.addExtension(
+        PassManagerBuilder::EP_EarlyAsPossible,
+        [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+          TM->addEarlyAsPossiblePasses(PM);
+        });
 
 Reflection Parameters
 ---------------------
@@ -339,35 +351,16 @@
 ``__CUDA_FTZ=[0,1]`` Use optimized code paths that flush subnormals to zero
 ==================== ======================================================
 
+The value of this flag is determined by the "nvvm-reflect-ftz" module flag.
+The following sets the ftz flag to 1.
 
-Invoking NVVMReflect
---------------------
-
-To ensure that all dead code caused by the reflection pass is eliminated, it
-is recommended that the reflection pass is executed early in the LLVM IR
-optimization pipeline. The pass takes an optional mapping of reflection
-parameter name to an integer value. This mapping can be specified as either a
-command-line option to ``opt`` or as an LLVM ``StringMap<int>`` object when
-programmatically creating a pass pipeline.
-
-With ``opt``:
-
-.. code-block:: text
-
-  # opt -nvvm-reflect -nvvm-reflect-list=<var>=<value>,<var>=<value> module.bc -o module.reflect.bc
-
-
-With programmatic pass pipeline:
-
-.. code-block:: c++
-
-  extern FunctionPass *llvm::createNVVMReflectPass(const StringMap<int>& Mapping);
-
-  StringMap<int> ReflectParams;
-  ReflectParams["__CUDA_FTZ"] = 1;
-  Passes.add(createNVVMReflectPass(ReflectParams));
-
+.. code-block:: llvm
+    !llvm.module.flag = !{!0}
+    !0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
 
+(``i32 4`` indicates that the value set here overrides the value in another
+module we link with.  See the `LangRef <LangRef.html#module-flags-metadata>`
+for details.)
 
 Executing PTX
 =============
Index: llvm/trunk/lib/Target/NVPTX/NVPTX.h
===================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTX.h
+++ llvm/trunk/lib/Target/NVPTX/NVPTX.h
@@ -48,7 +48,6 @@
 FunctionPass *createNVPTXInferAddressSpacesPass();
 FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
 FunctionPass *createNVVMReflectPass();
-FunctionPass *createNVVMReflectPass(const StringMap<int> &Mapping);
 MachineFunctionPass *createNVPTXPrologEpilogPass();
 MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
 FunctionPass *createNVPTXImageOptimizerPass();
Index: llvm/trunk/lib/Target/NVPTX/NVVMReflect.cpp
===================================================================
--- llvm/trunk/lib/Target/NVPTX/NVVMReflect.cpp
+++ llvm/trunk/lib/Target/NVPTX/NVVMReflect.cpp
@@ -10,11 +10,10 @@
 // This pass replaces occurrences of __nvvm_reflect("foo") and llvm.nvvm.reflect
 // with an integer.
 //
-// We choose the value we use by looking, in this order, at:
-//
-//  * the -nvvm-reflect-list flag, which has the format "foo=1,bar=42",
-//  * the StringMap passed to the pass's constructor, and
-//  * metadata in the module itself.
+// We choose the value we use by looking at metadata in the module itself.  Note
+// that we intentionally only have one way to choose these values, because other
+// parts of LLVM (particularly, InstCombineCall) rely on being able to predict
+// the values chosen by this pass.
 //
 // If we see an unknown string, we replace its call with 0.
 //
@@ -49,30 +48,17 @@
 
 namespace {
 class NVVMReflect : public FunctionPass {
-private:
-  StringMap<int> VarMap;
-
 public:
   static char ID;
-  NVVMReflect() : NVVMReflect(StringMap<int>()) {}
-
-  NVVMReflect(const StringMap<int> &Mapping)
-      : FunctionPass(ID), VarMap(Mapping) {
+  NVVMReflect() : FunctionPass(ID) {
     initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
-    setVarMap();
   }
 
   bool runOnFunction(Function &) override;
-
-private:
-  void setVarMap();
 };
 }
 
 FunctionPass *llvm::createNVVMReflectPass() { return new NVVMReflect(); }
-FunctionPass *llvm::createNVVMReflectPass(const StringMap<int> &Mapping) {
-  return new NVVMReflect(Mapping);
-}
 
 static cl::opt<bool>
 NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
@@ -83,35 +69,6 @@
                 "Replace occurrences of __nvvm_reflect() calls with 0/1", false,
                 false)
 
-static cl::list<std::string>
-ReflectList("nvvm-reflect-list", cl::value_desc("name=<int>"), cl::Hidden,
-            cl::desc("A list of string=num assignments"),
-            cl::ValueRequired);
-
-/// The command line can look as follows :
-/// -nvvm-reflect-list a=1,b=2 -nvvm-reflect-list c=3,d=0 -R e=2
-/// The strings "a=1,b=2", "c=3,d=0", "e=2" are available in the
-/// ReflectList vector. First, each of ReflectList[i] is 'split'
-/// using "," as the delimiter. Then each of this part is split
-/// using "=" as the delimiter.
-void NVVMReflect::setVarMap() {
-  for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) {
-    DEBUG(dbgs() << "Option : "  << ReflectList[i] << "\n");
-    SmallVector<StringRef, 4> NameValList;
-    StringRef(ReflectList[i]).split(NameValList, ',');
-    for (unsigned j = 0, ej = NameValList.size(); j != ej; ++j) {
-      SmallVector<StringRef, 2> NameValPair;
-      NameValList[j].split(NameValPair, '=');
-      assert(NameValPair.size() == 2 && "name=val expected");
-      std::stringstream ValStream(NameValPair[1]);
-      int Val;
-      ValStream >> Val;
-      assert((!(ValStream.fail())) && "integer value expected");
-      VarMap[NameValPair[0]] = Val;
-    }
-  }
-}
-
 bool NVVMReflect::runOnFunction(Function &F) {
   if (!NVVMReflectEnabled)
     return false;
@@ -199,11 +156,10 @@
     DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");
 
     int ReflectVal = 0; // The default value is 0
-    auto Iter = VarMap.find(ReflectArg);
-    if (Iter != VarMap.end())
-      ReflectVal = Iter->second;
-    else if (ReflectArg == "__CUDA_FTZ") {
-      // Try to pull __CUDA_FTZ from the nvvm-reflect-ftz module flag.
+    if (ReflectArg == "__CUDA_FTZ") {
+      // Try to pull __CUDA_FTZ from the nvvm-reflect-ftz module flag.  Our
+      // choice here must be kept in sync with AutoUpgrade, which uses the same
+      // technique to detect whether ftz is enabled.
       if (auto *Flag = mdconst::extract_or_null<ConstantInt>(
               F.getParent()->getModuleFlag("nvvm-reflect-ftz")))
         ReflectVal = Flag->getSExtValue();
Index: llvm/trunk/test/CodeGen/NVPTX/nvvm-reflect.ll
===================================================================
--- llvm/trunk/test/CodeGen/NVPTX/nvvm-reflect.ll
+++ llvm/trunk/test/CodeGen/NVPTX/nvvm-reflect.ll
@@ -1,30 +1,38 @@
-; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=0 -O2 | FileCheck %s --check-prefix=USE_MUL_0
-; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=1 -O2 | FileCheck %s --check-prefix=USE_MUL_1
+; We run nvvm-reflect (and then optimize) this module twice, once with metadata
+; that enables FTZ, and again with metadata that disables it.
 
-@str = private unnamed_addr addrspace(4) constant [8 x i8] c"USE_MUL\00"
+; RUN: cat %s > %t.noftz
+; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
+; RUN: opt %t.noftz -S -nvvm-reflect -O2 \
+; RUN:   | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
+
+; RUN: cat %s > %t.ftz
+; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
+; RUN: opt %t.ftz -S -nvvm-reflect -O2 \
+; RUN:   | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
+
+@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
 
 declare i32 @__nvvm_reflect(i8*)
 declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*)
 
+; CHECK-LABEL: @foo
 define float @foo(float %a, float %b) {
-; USE_MUL_0: define float @foo
-; USE_MUL_0-NOT: call i32 @__nvvm_reflect
-; USE_MUL_1: define float @foo
-; USE_MUL_1-NOT: call i32 @__nvvm_reflect
-  %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
+; CHECK-NOT: call i32 @__nvvm_reflect
+  %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(4)* @str, i32 0, i32 0))
   %reflect = tail call i32 @__nvvm_reflect(i8* %ptr)
   %cmp = icmp ugt i32 %reflect, 0
   br i1 %cmp, label %use_mul, label %use_add
 
 use_mul:
-; USE_MUL_1: fmul float %a, %b
-; USE_MUL_0-NOT: fadd float %a, %b
+; USE_FTZ_1: fmul float %a, %b
+; USE_FTZ_0-NOT: fadd float %a, %b
   %ret1 = fmul float %a, %b
   br label %exit
 
 use_add:
-; USE_MUL_0: fadd float %a, %b
-; USE_MUL_1-NOT: fmul float %a, %b
+; USE_FTZ_0: fadd float %a, %b
+; USE_FTZ_1-NOT: fmul float %a, %b
   %ret2 = fadd float %a, %b
   br label %exit
 
@@ -35,14 +43,12 @@
 
 declare i32 @llvm.nvvm.reflect.p0i8(i8*)
 
-; USE_MUL_0: define i32 @intrinsic
-; USE_MUL_1: define i32 @intrinsic
+; CHECK-LABEL: define i32 @intrinsic
 define i32 @intrinsic() {
-; USE_MUL_0-NOT: call i32 @llvm.nvvm.reflect
-; USE_MUL_0: ret i32 0
-; USE_MUL_1-NOT: call i32 @llvm.nvvm.reflect
-; USE_MUL_1: ret i32 1
-  %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
+; CHECK-NOT: call i32 @llvm.nvvm.reflect
+; USE_FTZ_0: ret i32 0
+; USE_FTZ_1: ret i32 1
+  %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(4)* @str, i32 0, i32 0))
   %reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr)
   ret i32 %reflect
 }
@@ -50,26 +56,24 @@
 ; CUDA-7.0 passes __nvvm_reflect argument slightly differently.
 ; Verify that it works, too
 
-@"$str" = private addrspace(1) constant [8 x i8] c"USE_MUL\00"
+@"$str" = private addrspace(1) constant [11 x i8] c"__CUDA_FTZ\00"
 
+; CHECK-LABEL: @bar
 define float @bar(float %a, float %b) {
-; USE_MUL_0: define float @bar
-; USE_MUL_0-NOT: call i32 @__nvvm_reflect
-; USE_MUL_1: define float @bar
-; USE_MUL_1-NOT: call i32 @__nvvm_reflect
-  %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
+; CHECK-NOT: call i32 @__nvvm_reflect
+  %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
   %cmp = icmp ne i32 %reflect, 0
   br i1 %cmp, label %use_mul, label %use_add
 
 use_mul:
-; USE_MUL_1: fmul float %a, %b
-; USE_MUL_0-NOT: fadd float %a, %b
+; USE_FTZ_1: fmul float %a, %b
+; USE_FTZ_0-NOT: fadd float %a, %b
   %ret1 = fmul float %a, %b
   br label %exit
 
 use_add:
-; USE_MUL_0: fadd float %a, %b
-; USE_MUL_1-NOT: fmul float %a, %b
+; USE_FTZ_0: fadd float %a, %b
+; USE_FTZ_1-NOT: fmul float %a, %b
   %ret2 = fadd float %a, %b
   br label %exit
 
@@ -77,3 +81,6 @@
   %ret = phi float [%ret1, %use_mul], [%ret2, %use_add]
   ret float %ret
 }
+
+!llvm.module.flags = !{!0}
+; A module flag is added to the end of this file by the RUN lines at the top.