Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -126,6 +126,8 @@ ImmTyDA, ImmTyR128, ImmTyLWE, + ImmTyExpCompr, + ImmTyExpVM, ImmTyHwreg, ImmTySendMsg, }; @@ -217,6 +219,8 @@ bool isDA() const { return isImmTy(ImmTyDA); } bool isR128() const { return isImmTy(ImmTyUNorm); } bool isLWE() const { return isImmTy(ImmTyLWE); } + bool isExpVM() const { return isImmTy(ImmTyExpVM); } + bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } bool isOffen() const { return isImmTy(ImmTyOffen); } bool isIdxen() const { return isImmTy(ImmTyIdxen); } bool isAddr64() const { return isImmTy(ImmTyAddr64); } @@ -458,6 +462,8 @@ case ImmTyDA: OS << "DA"; break; case ImmTyR128: OS << "R128"; break; case ImmTyLWE: OS << "LWE"; break; + case ImmTyExpCompr: OS << "ExpCompr"; break; + case ImmTyExpVM: OS << "ExpVM"; break; case ImmTyHwreg: OS << "Hwreg"; break; case ImmTySendMsg: OS << "SendMsg"; break; } @@ -701,6 +707,8 @@ AMDGPUOperand::Ptr defaultDA() const; AMDGPUOperand::Ptr defaultR128() const; AMDGPUOperand::Ptr defaultLWE() const; + AMDGPUOperand::Ptr defaultExpCompr() const; + AMDGPUOperand::Ptr defaultExpVM() const; AMDGPUOperand::Ptr defaultSMRDOffset() const; AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; @@ -2262,6 +2270,14 @@ return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyLWE); } +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultExpCompr() const { + return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyExpCompr); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultExpVM() const { + return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyExpVM); +} + //===----------------------------------------------------------------------===// // smrd //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -60,7 +60,9 @@ void printDA(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printR128(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printLWE(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printRegOperand(unsigned RegNo, raw_ostream &O); + void printExpCompr(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printExpVM(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printVOPDst(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printImmediate32(uint32_t I, raw_ostream &O); void printImmediate64(uint64_t I, raw_ostream &O); @@ -76,6 +78,15 @@ void printSDWASrc0Sel(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printSDWASrc1Sel(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printSDWADstUnused(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + template + void printExpSrcN(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printExpSrc0(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printExpSrc1(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printExpSrc2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printExpSrc3(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printExpTgt(const MCInst *MI, unsigned OpNo, raw_ostream &O); + static void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -179,6 +179,18 @@ printNamedBit(MI, OpNo, O, "lwe"); } +void AMDGPUInstPrinter::printExpCompr(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << " compr"; +} + +void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << " vm"; +} + void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O, const MCRegisterInfo &MRI) { switch (reg) { @@ -552,6 +564,61 @@ } } +template +void AMDGPUInstPrinter::printExpSrcN(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned En = MI->getOperand(MI->getNumOperands() - 1).getImm(); + + // FIXME: What do we do with compr? The meaning of en changes depending on if + // compr is set. + + if (En & (1 << N)) + printRegOperand(MI->getOperand(OpNo).getReg(), O, MRI); + else + O << "off"; +} + +void AMDGPUInstPrinter::printExpSrc0(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printExpSrcN<0>(MI, OpNo, O); +} + +void AMDGPUInstPrinter::printExpSrc1(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printExpSrcN<1>(MI, OpNo, O); +} + +void AMDGPUInstPrinter::printExpSrc2(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printExpSrcN<2>(MI, OpNo, O); +} + +void AMDGPUInstPrinter::printExpSrc3(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printExpSrcN<3>(MI, OpNo, O); +} + +void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + // This is really a 6 bit field. + uint32_t Tgt = MI->getOperand(OpNo).getImm() & ((1 << 6) - 1); + + if (Tgt <= 7) + O << " mrt" << Tgt; + else if (Tgt == 8) + O << " z"; + else if (Tgt == 9) + O << " null"; + else if (Tgt == 10 || Tgt == 11) { + // Reserved values? Should we error here? + } else if (Tgt >= 12 && Tgt <= 15) + O << " position" << Tgt - 12; + else { + assert(Tgt >= 32 && Tgt <= 63 && "Out of range exp tgt value"); + O << " param" << Tgt - 32; + } +} + void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1999,9 +1999,9 @@ const SDValue Ops[] = { Chain, DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), - DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i32), // TODO: i1 + DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1), DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), - DAG.getTargetConstant(Compr->getZExtValue(), DL, MVT::i32), // TODO: i1 + DAG.getTargetConstant(Compr->getZExtValue(), DL, MVT::i1), Op.getOperand(7), // src0 Op.getOperand(8), // src1 Op.getOperand(9), // src2 Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -557,6 +557,8 @@ def da : NamedOperandBit<"DA", NamedMatchClass<"DA">>; def r128 : NamedOperandBit<"R128", NamedMatchClass<"R128">>; def lwe : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>; +def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>; +def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>; def dmask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>; @@ -572,6 +574,10 @@ def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>; +def exp_tgt : Operand { + let PrintMethod = "printExpTgt"; +} + } // End OperandType = "OPERAND_IMMEDIATE" @@ -2521,12 +2527,13 @@ // Export opcodes //===----------------------------------------------------------------------===// +// en operand kept at end to easily find it from srcN printers. class EXP_Helper : EXPCommon< (outs), - (ins i8imm:$en, i8imm:$tgt, i32imm:$compr, i32imm:$vm, - VGPR_32:$src0, VGPR_32:$src1, VGPR_32:$src2, VGPR_32:$src3), - "exp $en, $tgt, $compr, "#!if(done, "1", "0")#", $vm, $src0, $src1, $src2, $src3", - [(node (i8 timm:$en), (i32 timm:$vm), (i8 timm:$tgt), (i32 timm:$compr), + (ins exp_tgt:$tgt, exp_vm:$vm, + ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3, exp_compr:$compr, i8imm:$en), + "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm", + [(node (i8 timm:$en), (i1 timm:$vm), (i8 timm:$tgt), (i1 timm:$compr), f32:$src0, f32:$src1, f32:$src2, f32:$src3)] >; Index: lib/Target/AMDGPU/SILowerControlFlow.cpp =================================================================== --- lib/Target/AMDGPU/SILowerControlFlow.cpp +++ lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -229,14 +229,14 @@ // Exec mask is zero: Export to NULL target... BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP_DONE)) - .addImm(0) - .addImm(0x09) // V_008DFC_SQ_EXP_NULL - .addImm(0) - .addImm(1) + .addImm(0x09) // tgt V_008DFC_SQ_EXP_NULL + .addImm(1) // vm .addReg(AMDGPU::VGPR0, RegState::Undef) .addReg(AMDGPU::VGPR0, RegState::Undef) .addReg(AMDGPU::VGPR0, RegState::Undef) - .addReg(AMDGPU::VGPR0, RegState::Undef); + .addReg(AMDGPU::VGPR0, RegState::Undef) + .addImm(0) // compr + .addImm(0); // en // ... and terminate wavefront. BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)); Index: lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.td +++ lib/Target/AMDGPU/SIRegisterInfo.td @@ -421,3 +421,24 @@ let OperandType = "OPERAND_REG_INLINE_C"; let ParserMatchClass = RegImmMatcher<"SCSrc64">; } + +// ===----------------------------------------------------------------------===// +// ExpSrc* Special cases for exp src operands which are printed as +// "off" depending on en operand. +// ===----------------------------------------------------------------------===// + +def ExpSrc0 : RegisterOperand { + let PrintMethod = "printExpSrc0"; +} + +def ExpSrc1 : RegisterOperand { + let PrintMethod = "printExpSrc1"; +} + +def ExpSrc2 : RegisterOperand { + let PrintMethod = "printExpSrc2"; +} + +def ExpSrc3 : RegisterOperand { + let PrintMethod = "printExpSrc3"; +} Index: test/CodeGen/AMDGPU/llvm.SI.export.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.SI.export.ll @@ -0,0 +1,237 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #0 + +; GCN-LABEL: {{^}}test_export_zeroes: +; GCN: exp mrt0 off, off, off, off{{$}} +; GCN: exp mrt0 off, off, off, off done{{$}} +define void @test_export_zeroes() #0 { + + call void @llvm.SI.export(i32 0, i32 0, i32 0, i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0) + call void @llvm.SI.export(i32 0, i32 0, i32 1, i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0) + ret void +} + +; FIXME: Should not set up registers for the unused source registers. + +; GCN-LABEL: {{^}}test_export_en_src0: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 [[SRC0]], off, off, off done{{$}} +define void @test_export_en_src0() #0 { + call void @llvm.SI.export(i32 1, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_en_src1: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 off, [[SRC1]], off, off done{{$}} +define void @test_export_en_src1() #0 { + call void @llvm.SI.export(i32 2, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_en_src2: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 off, off, [[SRC2]], off done{{$}} +define void @test_export_en_src2() #0 { + call void @llvm.SI.export(i32 4, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_en_src3: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 off, off, off, [[SRC3]] done{{$}} +define void @test_export_en_src3() #0 { + call void @llvm.SI.export(i32 8, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_en_src0_src1: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 [[SRC0]], [[SRC1]], off, off done{{$}} +define void @test_export_en_src0_src1() #0 { + call void @llvm.SI.export(i32 3, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_en_src0_src2: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 [[SRC0]], off, [[SRC2]], off done{{$}} +define void @test_export_en_src0_src2() #0 { + call void @llvm.SI.export(i32 5, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_en_src0_src3: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]]{{$}} +; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}} +define void @test_export_en_src0_src3() #0 { + call void @llvm.SI.export(i32 9, i32 0, i32 0, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 9, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_en_src0_src1_src2_src3: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_en_src0_src1_src2_src3() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_mrt7: +; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 0.5 +; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}} +; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}} +define void @test_export_mrt7() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 7, i32 0, float 0.5, float 0.5, float 0.5, float 0.5) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 7, i32 0, float 0.5, float 0.5, float 0.5, float 0.5) + ret void +} + +; GCN-LABEL: {{^}}test_export_z: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp z [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp z [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_z() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 8, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 8, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_null: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_null() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 9, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 9, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_reserved10: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_reserved10() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 10, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 10, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_reserved11: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_reserved11() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 11, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 11, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_position0: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp position0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp position0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_position0() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_position3: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp position3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp position3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_position3() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 15, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 15, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_param0: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_param0() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 32, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_param31: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_param31() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 63, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 63, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_vm: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}} +; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}} +define void @test_export_vm() #0 { + call void @llvm.SI.export(i32 15, i32 1, i32 0, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +attributes #0 = { nounwind "ShaderType"="0" } Index: test/CodeGen/AMDGPU/ret.ll =================================================================== --- test/CodeGen/AMDGPU/ret.ll +++ test/CodeGen/AMDGPU/ret.ll @@ -6,7 +6,7 @@ ; GCN-LABEL: {{^}}vgpr: ; GCN: v_mov_b32_e32 v1, v0 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1 -; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1 +; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm ; GCN: s_waitcnt expcnt(0) ; GCN-NOT: s_endpgm define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { @@ -18,7 +18,7 @@ } ; GCN-LABEL: {{^}}vgpr_literal: -; GCN: exp 15, 0, 1, 1, 1, v0, v0, v0, v0 +; GCN: exp mrt0 v0, v0, v0, v0 done compr vm ; GCN: s_waitcnt expcnt(0) ; GCN-DAG: v_mov_b32_e32 v0, 1.0 ; GCN-DAG: v_mov_b32_e32 v1, 2.0 @@ -42,7 +42,6 @@ ; GCN: v_mov_b32_e32 v3, v4 ; GCN: v_mov_b32_e32 v4, v6 ; GCN-NOT: s_endpgm -attributes #0 = { "InitialPSInputAddr"="0" } define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { %i0 = extractelement <2 x i32> %4, i32 0 %i1 = extractelement <2 x i32> %4, i32 1 @@ -208,7 +207,7 @@ ; GCN-LABEL: {{^}}both: ; GCN: v_mov_b32_e32 v1, v0 -; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1 +; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1 ; GCN-DAG: s_add_i32 s0, s3, 2 ; GCN-DAG: s_mov_b32 s1, s2 @@ -229,7 +228,7 @@ ; GCN-LABEL: {{^}}structure_literal: -; GCN: exp 15, 0, 1, 1, 1, v0, v0, v0, v0 +; GCN: exp mrt0 v0, v0, v0, v0 done compr vm ; GCN: s_waitcnt expcnt(0) ; GCN-DAG: v_mov_b32_e32 v0, 1.0 ; GCN-DAG: s_mov_b32 s0, 2 @@ -240,3 +239,5 @@ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3) ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> }} } + +attributes #0 = { nounwind "InitialPSInputAddr"="0" } Index: test/CodeGen/AMDGPU/skip-if-dead.ll =================================================================== --- test/CodeGen/AMDGPU/skip-if-dead.ll +++ test/CodeGen/AMDGPU/skip-if-dead.ll @@ -106,7 +106,7 @@ ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 ; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]] ; CHECK-NEXT: ; BB#3: -; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0 +; CHECK-NEXT: exp null off, off, off, off done vm ; CHECK-NEXT: s_endpgm ; CHECK-NEXT: {{^}}[[SPLIT_BB]]: @@ -157,7 +157,7 @@ ; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]] ; CHECK-NEXT: ; BB#4: -; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0 +; CHECK-NEXT: exp null off, off, off, off done vm ; CHECK-NEXT: s_endpgm ; CHECK-NEXT: {{^}}[[SPLIT_BB]]: