Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
===================================================================
--- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -135,6 +135,8 @@
     ImmTyDA,
     ImmTyR128,
     ImmTyLWE,
+    ImmTyExpCompr,
+    ImmTyExpVM,
     ImmTyHwreg,
     ImmTySendMsg,
   };
@@ -228,6 +230,8 @@
   bool isDA() const { return isImmTy(ImmTyDA); }
   bool isR128() const { return isImmTy(ImmTyUNorm); }
   bool isLWE() const { return isImmTy(ImmTyLWE); }
+  bool isExpVM() const { return isImmTy(ImmTyExpVM); }
+  bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
   bool isOffen() const { return isImmTy(ImmTyOffen); }
   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
@@ -484,6 +488,8 @@
     case ImmTyDA: OS << "DA"; break;
     case ImmTyR128: OS << "R128"; break;
     case ImmTyLWE: OS << "LWE"; break;
+    case ImmTyExpCompr: OS << "ExpCompr"; break;
+    case ImmTyExpVM: OS << "ExpVM"; break;
     case ImmTyHwreg: OS << "Hwreg"; break;
     case ImmTySendMsg: OS << "SendMsg"; break;
     }
@@ -745,6 +751,8 @@
   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
+  AMDGPUOperand::Ptr defaultExpCompr() const;
+  AMDGPUOperand::Ptr defaultExpVM() const;
 
   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
 
@@ -2532,6 +2540,14 @@
   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
 }
 
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultExpCompr() const {
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyExpCompr);
+}
+
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultExpVM() const {
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyExpVM);
+}
+
 //===----------------------------------------------------------------------===//
 // smrd
 //===----------------------------------------------------------------------===//
Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
===================================================================
--- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -78,8 +78,13 @@
                raw_ostream &O);
   void printR128(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                  raw_ostream &O);
-  void printLWE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                raw_ostream &O);
+  void printLWE(const MCInst *MI, unsigned OpNo,
+                const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpCompr(const MCInst *MI, unsigned OpNo,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpVM(const MCInst *MI, unsigned OpNo,
+                  const MCSubtargetInfo &STI, raw_ostream &O);
+
   void printRegOperand(unsigned RegNo, raw_ostream &O);
   void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                    raw_ostream &O);
@@ -116,6 +121,22 @@
                           const MCSubtargetInfo &STI, raw_ostream &O);
   void printMemOperand(const MCInst *MI, unsigned OpNo,
                        const MCSubtargetInfo &STI, raw_ostream &O);
+
+
+  template <unsigned N>
+  void printExpSrcN(const MCInst *MI, unsigned OpNo,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpSrc0(const MCInst *MI, unsigned OpNo,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpSrc1(const MCInst *MI, unsigned OpNo,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpSrc2(const MCInst *MI, unsigned OpNo,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpSrc3(const MCInst *MI, unsigned OpNo,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpTgt(const MCInst *MI, unsigned OpNo,
+                   const MCSubtargetInfo &STI, raw_ostream &O);
+
   static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
                          StringRef Asm, StringRef Default = "");
   static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
===================================================================
--- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -195,6 +195,20 @@
   printNamedBit(MI, OpNo, O, "lwe");
 }
 
+void AMDGPUInstPrinter::printExpCompr(const MCInst *MI, unsigned OpNo,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm())
+    O << " compr";
+}
+
+void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo,
+                                   const MCSubtargetInfo &STI,
+                                   raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm())
+    O << " vm";
+}
+
 void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
                                         const MCRegisterInfo &MRI) {
   switch (RegNo) {
@@ -599,10 +613,72 @@
   }
 }
 
-void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNo,
+template <unsigned N>
+void AMDGPUInstPrinter::printExpSrcN(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  int EnIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::en);
+  unsigned En = MI->getOperand(EnIdx).getImm();
+
+  // FIXME: What do we do with compr? The meaning of en changes depending on if
+  // compr is set.
+
+  if (En & (1 << N))
+    printRegOperand(MI->getOperand(OpNo).getReg(), O, MRI);
+  else
+    O << "off";
+}
+
+void AMDGPUInstPrinter::printExpSrc0(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  printExpSrcN<0>(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printExpSrc1(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  printExpSrcN<1>(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printExpSrc2(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  printExpSrcN<2>(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printExpSrc3(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  printExpSrcN<3>(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo,
+                                    const MCSubtargetInfo &STI,
+                                    raw_ostream &O) {
+  // This is really a 6 bit field.
+  uint32_t Tgt = MI->getOperand(OpNo).getImm() & ((1 << 6) - 1);
+
+  if (Tgt <= 7)
+    O << " mrt" << Tgt;
+  else if (Tgt == 8)
+    O << " mrtz";
+  else if (Tgt == 9)
+    O << " null";
+  else if (Tgt >= 12 && Tgt <= 15)
+    O << " pos" << Tgt - 12;
+  else if (Tgt >= 32 && Tgt <= 63)
+    O << " param" << Tgt - 32;
+  else {
+    // Reserved values 10, 11
+    O << " invalid_target_" << Tgt;
+  }
+}
+
+void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
                                         const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
-  unsigned Imm = MI->getOperand(OpNo).getImm();
+  unsigned Imm = MI->getOperand(OpNum).getImm();
 
   if (Imm == 2) {
     O << "P0";
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2705,9 +2705,9 @@
     const SDValue Ops[] = {
       Chain,
       DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8),
-      DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i32), // TODO: i1
+      DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1),
       DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8),
-      DAG.getTargetConstant(Compr->getZExtValue(), DL, MVT::i32), // TODO: i1
+      DAG.getTargetConstant(Compr->getZExtValue(), DL, MVT::i1),
       Op.getOperand(7), // src0
       Op.getOperand(8), // src1
       Op.getOperand(9), // src2
Index: lib/Target/AMDGPU/SIInstrInfo.td
===================================================================
--- lib/Target/AMDGPU/SIInstrInfo.td
+++ lib/Target/AMDGPU/SIInstrInfo.td
@@ -385,6 +385,8 @@
 def da : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
 def r128 : NamedOperandBit<"R128", NamedMatchClass<"R128">>;
 def lwe : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
+def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
+def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;
 
 def dmask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
 
@@ -400,6 +402,10 @@
 
 def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
 
+def exp_tgt : Operand<i8> {
+  let PrintMethod = "printExpTgt";
+}
+
 } // End OperandType = "OPERAND_IMMEDIATE"
 
 
@@ -520,10 +526,11 @@
 
 class EXP_Helper<bit done, SDPatternOperator node = null_frag> : EXPCommon<
   (outs),
-  (ins i8imm:$tgt, VGPR_32:$src0, VGPR_32:$src1, VGPR_32:$src2, VGPR_32:$src3,
-       i32imm:$vm, i32imm:$compr, i8imm:$en),
-  "exp $en, $tgt, $compr, "#!if(done, "1", "0")#", $vm, $src0, $src1, $src2, $src3",
-  [(node (i8 timm:$en), (i32 timm:$vm), (i8 timm:$tgt), (i32 timm:$compr),
+  (ins exp_tgt:$tgt,
+       ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3,
+       exp_vm:$vm, exp_compr:$compr, i8imm:$en),
+  "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm",
+  [(node (i8 timm:$en), (i1 timm:$vm), (i8 timm:$tgt), (i1 timm:$compr),
          f32:$src0, f32:$src1, f32:$src2, f32:$src3)]
 >;
 
Index: lib/Target/AMDGPU/SIRegisterInfo.td
===================================================================
--- lib/Target/AMDGPU/SIRegisterInfo.td
+++ lib/Target/AMDGPU/SIRegisterInfo.td
@@ -431,3 +431,24 @@
 //===----------------------------------------------------------------------===//
 
 defm VCSrc : RegInlineOperand<"VS", "VCSrc">;
+
+// ===----------------------------------------------------------------------===//
+// ExpSrc* Special cases for exp src operands which are printed as
+// "off" depending on en operand.
+// ===----------------------------------------------------------------------===//
+
+def ExpSrc0 : RegisterOperand<VGPR_32> {
+  let PrintMethod = "printExpSrc0";
+}
+
+def ExpSrc1 : RegisterOperand<VGPR_32> {
+  let PrintMethod = "printExpSrc1";
+}
+
+def ExpSrc2 : RegisterOperand<VGPR_32> {
+  let PrintMethod = "printExpSrc2";
+}
+
+def ExpSrc3 : RegisterOperand<VGPR_32> {
+  let PrintMethod = "printExpSrc3";
+}
Index: test/CodeGen/AMDGPU/llvm.SI.export.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/llvm.SI.export.ll
@@ -0,0 +1,237 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #0
+
+; GCN-LABEL: {{^}}test_export_zeroes:
+; GCN: exp mrt0 off, off, off, off{{$}}
+; GCN: exp mrt0 off, off, off, off done{{$}}
+define void @test_export_zeroes() #0 {
+
+  call void @llvm.SI.export(i32 0, i32 0, i32 0, i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0)
+  call void @llvm.SI.export(i32 0, i32 0, i32 1, i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0)
+  ret void
+}
+
+; FIXME: Should not set up registers for the unused source registers.
+
+; GCN-LABEL: {{^}}test_export_en_src0:
+; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; GCN: exp mrt0 [[SRC0]], off, off, off done{{$}}
+define void @test_export_en_src0() #0 {
+  call void @llvm.SI.export(i32 1, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_export_en_src1:
+; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; GCN: exp mrt0 off, [[SRC1]], off, off done{{$}}
+define void @test_export_en_src1() #0 {
+  call void @llvm.SI.export(i32 2, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_export_en_src2:
+; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; GCN: exp mrt0 off, off, [[SRC2]], off done{{$}}
+define void @test_export_en_src2() #0 {
+  call void @llvm.SI.export(i32 4, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_export_en_src3:
+; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; GCN: exp mrt0 off, off, off, [[SRC3]] done{{$}}
+define void @test_export_en_src3() #0 {
+  call void @llvm.SI.export(i32 8, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_export_en_src0_src1:
+; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; GCN: exp mrt0 [[SRC0]], [[SRC1]], off, off done{{$}}
+define void @test_export_en_src0_src1() #0 {
+  call void @llvm.SI.export(i32 3, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_export_en_src0_src2:
+; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; GCN: exp mrt0 [[SRC0]], off, [[SRC2]], off done{{$}}
+define void @test_export_en_src0_src2() #0 {
+  call void @llvm.SI.export(i32 5, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_export_en_src0_src3:
+; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]]{{$}}
+; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}}
+define void @test_export_en_src0_src3() #0 {
+  call void @llvm.SI.export(i32 9, i32 0, i32 0, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  call void @llvm.SI.export(i32 9, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_export_en_src0_src1_src2_src3:
+; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+define void @test_export_en_src0_src1_src2_src3() #0 {
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_export_mrt7:
+; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 0.5
+; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}}
+; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}}
+define void @test_export_mrt7() #0 {
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 7, i32 0, float 0.5, float 0.5, float 0.5, float 0.5)
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 7, i32 0, float 0.5, float 0.5, float 0.5, float 0.5)
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_export_z:
+; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+define void @test_export_z() #0 {
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 8, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 8, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_export_null:
+; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+define void @test_export_null() #0 {
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 9, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 9, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_export_reserved10:
+; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+define void @test_export_reserved10() #0 {
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 10, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 10, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_export_reserved11:
+; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+define void @test_export_reserved11() #0 {
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 11, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 11, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_export_pos0:
+; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+define void @test_export_pos0() #0 {
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_export_pos3:
+; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+define void @test_export_pos3() #0 {
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 15, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 15, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_export_param0:
+; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+define void @test_export_param0() #0 {
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 32, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_export_param31:
+; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+define void @test_export_param31() #0 {
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 63, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 63, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_export_vm:
+; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
+; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
+; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
+; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
+; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
+; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
+define void @test_export_vm() #0 {
+  call void @llvm.SI.export(i32 15, i32 1, i32 0, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0)
+  ret void
+}
+
+attributes #0 = { nounwind "ShaderType"="0" }
Index: test/CodeGen/AMDGPU/ret.ll
===================================================================
--- test/CodeGen/AMDGPU/ret.ll
+++ test/CodeGen/AMDGPU/ret.ll
@@ -6,7 +6,7 @@
 ; GCN-LABEL: {{^}}vgpr:
 ; GCN: v_mov_b32_e32 v1, v0
 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
-; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
+; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm
 ; GCN: s_waitcnt expcnt(0)
 ; GCN-NOT: s_endpgm
 define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
@@ -19,7 +19,8 @@
 
 ; GCN-LABEL: {{^}}vgpr_literal:
 ; GCN: v_mov_b32_e32 v4, v0
-; GCN: exp 15, 0, 1, 1, 1, v4, v4, v4, v4
+; GCN: exp mrt0 v4, v4, v4, v4 done compr vm
+
 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
@@ -43,7 +44,6 @@
 ; GCN: v_mov_b32_e32 v3, v4
 ; GCN: v_mov_b32_e32 v4, v6
 ; GCN-NOT: s_endpgm
-attributes #0 = { "InitialPSInputAddr"="0" }
 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
   %i0 = extractelement <2 x i32> %4, i32 0
   %i1 = extractelement <2 x i32> %4, i32 1
@@ -209,7 +209,7 @@
 
 ; GCN-LABEL: {{^}}both:
 ; GCN: v_mov_b32_e32 v1, v0
-; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
+; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm
 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
 ; GCN-DAG: s_add_i32 s0, s3, 2
 ; GCN-DAG: s_mov_b32 s1, s2
@@ -231,7 +231,8 @@
 
 ; GCN-LABEL: {{^}}structure_literal:
 ; GCN: v_mov_b32_e32 v3, v0
-; GCN: exp 15, 0, 1, 1, 1, v3, v3, v3, v3
+; GCN: exp mrt0 v3, v3, v3, v3 done compr vm
+
 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
 ; GCN-DAG: s_mov_b32 s0, 2
 ; GCN-DAG: s_mov_b32 s1, 3
@@ -242,3 +243,5 @@
   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
   ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> <float 2.0, float 4.0>}}
 }
+
+attributes #0 = { nounwind "InitialPSInputAddr"="0" }
Index: test/CodeGen/AMDGPU/skip-if-dead.ll
===================================================================
--- test/CodeGen/AMDGPU/skip-if-dead.ll
+++ test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -106,7 +106,7 @@
 ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7
 ; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]]
 ; CHECK-NEXT: ; BB#2:
-; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0
+; CHECK-NEXT: exp null off, off, off, off done vm
 ; CHECK-NEXT: s_endpgm
 
 ; CHECK-NEXT: {{^}}[[SPLIT_BB]]:
@@ -158,7 +158,7 @@
 ; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]]
 
 ; CHECK-NEXT: ; BB#2:
-; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0
+; CHECK-NEXT: exp null off, off, off, off done vm
 ; CHECK-NEXT: s_endpgm
 
 ; CHECK-NEXT: {{^}}[[SPLIT_BB]]: