Index: include/clang/Driver/Options.td
===================================================================
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -449,6 +449,10 @@
   HelpText<"Pass <arg> to fatbinary invocation">, MetaVarName<"<arg>">;
 def Xcuda_ptxas : Separate<["-"], "Xcuda-ptxas">,
   HelpText<"Pass <arg> to the ptxas assembler">, MetaVarName<"<arg>">;
+def Xopenmp_target : Separate<["-"], "Xopenmp-target">,
+  HelpText<"Pass arguments to target offloading toolchain.">;
+def Xopenmp_target_EQ : JoinedAndSeparate<["-"], "Xopenmp-target=">,
+  HelpText<"Pass arguments to target offloading toolchain. First entry is a triple that identifies the toolchain.">;
 def z : Separate<["-"], "z">, Flags<[LinkerInput, RenderAsInput]>,
   HelpText<"Pass -z <arg> to the linker">, MetaVarName<"<arg>">,
   Group<Link_Group>;
Index: lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- lib/Driver/ToolChains/Cuda.cpp
+++ lib/Driver/ToolChains/Cuda.cpp
@@ -212,8 +212,20 @@
       static_cast<const toolchains::CudaToolChain &>(getToolChain());
   assert(TC.getTriple().isNVPTX() && "Wrong platform");
 
+  StringRef GPUArchName;
+  std::vector<std::string> GPUArchNames;
+  // If this is an OpenMP action we need to extract the device architecture from
+  // the -fopenmp-target-arch option.
+  if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
+    GPUArchNames = Args.getAllArgValues(options::OPT_march_EQ);
+    assert(GPUArchNames.size() == 1 &&
+           "Exactly one GPU Arch required for ptxas.");
+    GPUArchName = GPUArchNames[0];
+  } else
+    GPUArchName = JA.getOffloadingArch();
+
   // Obtain architecture from the action.
-  CudaArch gpu_arch = StringToCudaArch(JA.getOffloadingArch());
+  CudaArch gpu_arch = StringToCudaArch(GPUArchName);
   assert(gpu_arch != CudaArch::UNKNOWN &&
          "Device action expected to have an architecture.");
 
@@ -392,6 +404,15 @@
   CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
 }
 
+void AddMArchOption(DerivedArgList *DAL,
+                    const OptTable &Opts,
+                    StringRef Opt) {
+  if (Opt.startswith("-march="))
+    DAL->AddJoinedArg(nullptr,
+        Opts.getOption(options::OPT_march_EQ),
+        Opt.split("=").second);
+}
+
 llvm::opt::DerivedArgList *
 CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
                              StringRef BoundArch,
@@ -405,7 +426,7 @@
 
   // For OpenMP device offloading, append derived arguments. Make sure
   // flags are not duplicated.
-  // TODO: Append the compute capability.
+  // Also append the compute capability.
   if (DeviceOffloadKind == Action::OFK_OpenMP) {
     for (Arg *A : Args){
       bool IsDuplicate = false;
@@ -418,6 +439,47 @@
       if (!IsDuplicate)
         DAL->append(A);
     }
+
+    // Get the compute capability from the -fopenmp-targets flag.
+    // The default compute capability is sm_20 since this is a CUDA
+    // tool chain.
+    auto OptList = Args.getAllArgValues(options::OPT_Xopenmp_target_EQ);
+
+    // For each OPT_Xopenmp_target_EQ option, the function returns
+    // two strings, the triple and the option.
+    // The following format is assumed:
+    //
+    // -Xopenmp-target=nvptx64-nvidia-cuda -opt=val
+    for (unsigned i = 0; i < OptList.size(); i+=2) {
+      StringRef Opt = OptList[i+1];
+      if (OptList[i] == getTripleString())
+        AddMArchOption(DAL, Opts, Opt);
+    }
+
+    OptList = Args.getAllArgValues(options::OPT_Xopenmp_target);
+    // When there is only one option in the list, the following format
+    // is assumed:
+    //
+    // -Xopenmp-target -opt=val
+
+    // By default, if no triple is explicitely specified, we
+    // associate -opt=val with the toolchain specified under the
+    // -fopenmp-targets flag (provided that there is only one such
+    // toolchain specified).
+    assert(Args.getAllArgValues(options::OPT_fopenmp_targets_EQ).size() == 1 &&
+        "Target toolchain not specified on -Xopenmp-target and cannot be deduced.");
+
+    // Add arch
+    for (StringRef Opt : OptList) {
+      AddMArchOption(DAL, Opts, Opt);
+    }
+
+    auto MArchList = DAL->getAllArgValues(options::OPT_march_EQ);
+    assert(MArchList.size() < 2 && "At most one GPU arch allowed.");
+    if (MArchList.empty())
+      DAL->AddJoinedArg(nullptr,
+          Opts.getOption(options::OPT_march_EQ), "sm_20");
+
     return DAL;
   }
 
Index: test/Driver/openmp-offload.c
===================================================================
--- test/Driver/openmp-offload.c
+++ test/Driver/openmp-offload.c
@@ -597,3 +597,19 @@
 // RUN:   | FileCheck -check-prefix=CHK-FOPENMP-IS-DEVICE %s
 
 // CHK-FOPENMP-IS-DEVICE: clang{{.*}} "-aux-triple" "powerpc64le-unknown-linux-gnu" {{.*}}.c" "-fopenmp-is-device" "-fopenmp-host-ir-file-path"
+
+/// ###########################################################################
+
+/// Check -Xopenmp-target=powerpc64le-ibm-linux-gnu -march=pwr8 is passed when compiling for the device.
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -Xopenmp-target=powerpc64le-ibm-linux-gnu -march=pwr8 %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-FOPENMP-EQ-TARGET %s
+
+// CHK-FOPENMP-EQ-TARGET: clang{{.*}} argument unused during compilation: '-Xopenmp-target=powerpc64le-ibm-linux-gnu -march=pwr8'
+
+/// ###########################################################################
+
+/// Check -Xopenmp-target -march=pwr8 is passed when compiling for the device.
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -Xopenmp-target -march=pwr8 %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-FOPENMP-TARGET %s
+
+// CHK-FOPENMP-TARGET: clang{{.*}} argument unused during compilation: '-Xopenmp-target -march=pwr8'