Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -135,6 +135,8 @@ ImmTyDA, ImmTyR128, ImmTyLWE, + ImmTyExpCompr, + ImmTyExpVM, ImmTyHwreg, ImmTySendMsg, }; @@ -228,6 +230,8 @@ bool isDA() const { return isImmTy(ImmTyDA); } bool isR128() const { return isImmTy(ImmTyUNorm); } bool isLWE() const { return isImmTy(ImmTyLWE); } + bool isExpVM() const { return isImmTy(ImmTyExpVM); } + bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } bool isOffen() const { return isImmTy(ImmTyOffen); } bool isIdxen() const { return isImmTy(ImmTyIdxen); } bool isAddr64() const { return isImmTy(ImmTyAddr64); } @@ -484,6 +488,8 @@ case ImmTyDA: OS << "DA"; break; case ImmTyR128: OS << "R128"; break; case ImmTyLWE: OS << "LWE"; break; + case ImmTyExpCompr: OS << "ExpCompr"; break; + case ImmTyExpVM: OS << "ExpVM"; break; case ImmTyHwreg: OS << "Hwreg"; break; case ImmTySendMsg: OS << "SendMsg"; break; } @@ -745,6 +751,8 @@ AMDGPUOperand::Ptr defaultSMRDOffset8() const; AMDGPUOperand::Ptr defaultSMRDOffset20() const; AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; + AMDGPUOperand::Ptr defaultExpCompr() const; + AMDGPUOperand::Ptr defaultExpVM() const; OperandMatchResultTy parseOModOperand(OperandVector &Operands); @@ -2532,6 +2540,14 @@ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE); } +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultExpCompr() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyExpCompr); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultExpVM() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyExpVM); +} + //===----------------------------------------------------------------------===// // smrd //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -78,8 +78,13 @@ raw_ostream &O); void printR128(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); - void printLWE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, - raw_ostream &O); + void printLWE(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printExpCompr(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printExpVM(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printRegOperand(unsigned RegNo, raw_ostream &O); void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); @@ -116,6 +121,22 @@ const MCSubtargetInfo &STI, raw_ostream &O); void printMemOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + + + template + void printExpSrcN(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printExpSrc0(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printExpSrc1(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printExpSrc2(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printExpSrc3(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printExpTgt(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm, StringRef Default = ""); static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -195,6 +195,20 @@ printNamedBit(MI, OpNo, O, "lwe"); } +void AMDGPUInstPrinter::printExpCompr(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << " compr"; +} + +void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << " vm"; +} + void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI) { switch (RegNo) { @@ -599,10 +613,72 @@ } } -void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNo, +template +void AMDGPUInstPrinter::printExpSrcN(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + int EnIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::en); + unsigned En = MI->getOperand(EnIdx).getImm(); + + // FIXME: What do we do with compr? The meaning of en changes depending on if + // compr is set. + + if (En & (1 << N)) + printRegOperand(MI->getOperand(OpNo).getReg(), O, MRI); + else + O << "off"; +} + +void AMDGPUInstPrinter::printExpSrc0(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + printExpSrcN<0>(MI, OpNo, STI, O); +} + +void AMDGPUInstPrinter::printExpSrc1(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + printExpSrcN<1>(MI, OpNo, STI, O); +} + +void AMDGPUInstPrinter::printExpSrc2(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + printExpSrcN<2>(MI, OpNo, STI, O); +} + +void AMDGPUInstPrinter::printExpSrc3(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + printExpSrcN<3>(MI, OpNo, STI, O); +} + +void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + // This is really a 6 bit field. + uint32_t Tgt = MI->getOperand(OpNo).getImm() & ((1 << 6) - 1); + + if (Tgt <= 7) + O << " mrt" << Tgt; + else if (Tgt == 8) + O << " mrtz"; + else if (Tgt == 9) + O << " null"; + else if (Tgt >= 12 && Tgt <= 15) + O << " pos" << Tgt - 12; + else if (Tgt >= 32 && Tgt <= 63) + O << " param" << Tgt - 32; + else { + // Reserved values 10, 11 + O << " invalid_target_" << Tgt; + } +} + +void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { - unsigned Imm = MI->getOperand(OpNo).getImm(); + unsigned Imm = MI->getOperand(OpNum).getImm(); if (Imm == 2) { O << "P0"; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -2705,9 +2705,9 @@ const SDValue Ops[] = { Chain, DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), - DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i32), // TODO: i1 + DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1), DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), - DAG.getTargetConstant(Compr->getZExtValue(), DL, MVT::i32), // TODO: i1 + DAG.getTargetConstant(Compr->getZExtValue(), DL, MVT::i1), Op.getOperand(7), // src0 Op.getOperand(8), // src1 Op.getOperand(9), // src2 Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -385,6 +385,8 @@ def da : NamedOperandBit<"DA", NamedMatchClass<"DA">>; def r128 : NamedOperandBit<"R128", NamedMatchClass<"R128">>; def lwe : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>; +def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>; +def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>; def dmask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>; @@ -400,6 +402,10 @@ def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>; +def exp_tgt : Operand { + let PrintMethod = "printExpTgt"; +} + } // End OperandType = "OPERAND_IMMEDIATE" @@ -520,10 +526,11 @@ class EXP_Helper : EXPCommon< (outs), - (ins i8imm:$tgt, VGPR_32:$src0, VGPR_32:$src1, VGPR_32:$src2, VGPR_32:$src3, - i32imm:$vm, i32imm:$compr, i8imm:$en), - "exp $en, $tgt, $compr, "#!if(done, "1", "0")#", $vm, $src0, $src1, $src2, $src3", - [(node (i8 timm:$en), (i32 timm:$vm), (i8 timm:$tgt), (i32 timm:$compr), + (ins exp_tgt:$tgt, + ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3, + exp_vm:$vm, exp_compr:$compr, i8imm:$en), + "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm", + [(node (i8 timm:$en), (i1 timm:$vm), (i8 timm:$tgt), (i1 timm:$compr), f32:$src0, f32:$src1, f32:$src2, f32:$src3)] >; Index: lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.td +++ lib/Target/AMDGPU/SIRegisterInfo.td @@ -431,3 +431,24 @@ //===----------------------------------------------------------------------===// defm VCSrc : RegInlineOperand<"VS", "VCSrc">; + +// ===----------------------------------------------------------------------===// +// ExpSrc* Special cases for exp src operands which are printed as +// "off" depending on en operand. +// ===----------------------------------------------------------------------===// + +def ExpSrc0 : RegisterOperand { + let PrintMethod = "printExpSrc0"; +} + +def ExpSrc1 : RegisterOperand { + let PrintMethod = "printExpSrc1"; +} + +def ExpSrc2 : RegisterOperand { + let PrintMethod = "printExpSrc2"; +} + +def ExpSrc3 : RegisterOperand { + let PrintMethod = "printExpSrc3"; +} Index: test/CodeGen/AMDGPU/llvm.SI.export.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.SI.export.ll @@ -0,0 +1,237 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #0 + +; GCN-LABEL: {{^}}test_export_zeroes: +; GCN: exp mrt0 off, off, off, off{{$}} +; GCN: exp mrt0 off, off, off, off done{{$}} +define void @test_export_zeroes() #0 { + + call void @llvm.SI.export(i32 0, i32 0, i32 0, i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0) + call void @llvm.SI.export(i32 0, i32 0, i32 1, i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0) + ret void +} + +; FIXME: Should not set up registers for the unused source registers. + +; GCN-LABEL: {{^}}test_export_en_src0: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 [[SRC0]], off, off, off done{{$}} +define void @test_export_en_src0() #0 { + call void @llvm.SI.export(i32 1, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_en_src1: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 off, [[SRC1]], off, off done{{$}} +define void @test_export_en_src1() #0 { + call void @llvm.SI.export(i32 2, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_en_src2: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 off, off, [[SRC2]], off done{{$}} +define void @test_export_en_src2() #0 { + call void @llvm.SI.export(i32 4, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_en_src3: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 off, off, off, [[SRC3]] done{{$}} +define void @test_export_en_src3() #0 { + call void @llvm.SI.export(i32 8, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_en_src0_src1: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 [[SRC0]], [[SRC1]], off, off done{{$}} +define void @test_export_en_src0_src1() #0 { + call void @llvm.SI.export(i32 3, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_en_src0_src2: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 [[SRC0]], off, [[SRC2]], off done{{$}} +define void @test_export_en_src0_src2() #0 { + call void @llvm.SI.export(i32 5, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_en_src0_src3: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]]{{$}} +; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}} +define void @test_export_en_src0_src3() #0 { + call void @llvm.SI.export(i32 9, i32 0, i32 0, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 9, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_en_src0_src1_src2_src3: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_en_src0_src1_src2_src3() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_mrt7: +; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 0.5 +; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}} +; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}} +define void @test_export_mrt7() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 7, i32 0, float 0.5, float 0.5, float 0.5, float 0.5) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 7, i32 0, float 0.5, float 0.5, float 0.5, float 0.5) + ret void +} + +; GCN-LABEL: {{^}}test_export_z: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_z() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 8, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 8, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_null: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_null() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 9, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 9, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_reserved10: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_reserved10() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 10, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 10, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_reserved11: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_reserved11() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 11, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 11, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_pos0: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_pos0() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_pos3: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_pos3() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 15, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 15, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_param0: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_param0() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 32, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_param31: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +define void @test_export_param31() #0 { + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 63, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 63, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +; GCN-LABEL: {{^}}test_export_vm: +; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}} +; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}} +define void @test_export_vm() #0 { + call void @llvm.SI.export(i32 15, i32 1, i32 0, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 1.0, float 2.0, float 0.5, float 4.0) + ret void +} + +attributes #0 = { nounwind "ShaderType"="0" } Index: test/CodeGen/AMDGPU/ret.ll =================================================================== --- test/CodeGen/AMDGPU/ret.ll +++ test/CodeGen/AMDGPU/ret.ll @@ -6,7 +6,7 @@ ; GCN-LABEL: {{^}}vgpr: ; GCN: v_mov_b32_e32 v1, v0 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1 -; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1 +; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm ; GCN: s_waitcnt expcnt(0) ; GCN-NOT: s_endpgm define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { @@ -19,7 +19,8 @@ ; GCN-LABEL: {{^}}vgpr_literal: ; GCN: v_mov_b32_e32 v4, v0 -; GCN: exp 15, 0, 1, 1, 1, v4, v4, v4, v4 +; GCN: exp mrt0 v4, v4, v4, v4 done compr vm + ; GCN-DAG: v_mov_b32_e32 v0, 1.0 ; GCN-DAG: v_mov_b32_e32 v1, 2.0 ; GCN-DAG: v_mov_b32_e32 v2, 4.0 @@ -43,7 +44,6 @@ ; GCN: v_mov_b32_e32 v3, v4 ; GCN: v_mov_b32_e32 v4, v6 ; GCN-NOT: s_endpgm -attributes #0 = { "InitialPSInputAddr"="0" } define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { %i0 = extractelement <2 x i32> %4, i32 0 %i1 = extractelement <2 x i32> %4, i32 1 @@ -209,7 +209,7 @@ ; GCN-LABEL: {{^}}both: ; GCN: v_mov_b32_e32 v1, v0 -; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1 +; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1 ; GCN-DAG: s_add_i32 s0, s3, 2 ; GCN-DAG: s_mov_b32 s1, s2 @@ -231,7 +231,8 @@ ; GCN-LABEL: {{^}}structure_literal: ; GCN: v_mov_b32_e32 v3, v0 -; GCN: exp 15, 0, 1, 1, 1, v3, v3, v3, v3 +; GCN: exp mrt0 v3, v3, v3, v3 done compr vm + ; GCN-DAG: v_mov_b32_e32 v0, 1.0 ; GCN-DAG: s_mov_b32 s0, 2 ; GCN-DAG: s_mov_b32 s1, 3 @@ -242,3 +243,5 @@ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3) ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> }} } + +attributes #0 = { nounwind "InitialPSInputAddr"="0" } Index: test/CodeGen/AMDGPU/skip-if-dead.ll =================================================================== --- test/CodeGen/AMDGPU/skip-if-dead.ll +++ test/CodeGen/AMDGPU/skip-if-dead.ll @@ -106,7 +106,7 @@ ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 ; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]] ; CHECK-NEXT: ; BB#2: -; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0 +; CHECK-NEXT: exp null off, off, off, off done vm ; CHECK-NEXT: s_endpgm ; CHECK-NEXT: {{^}}[[SPLIT_BB]]: @@ -158,7 +158,7 @@ ; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]] ; CHECK-NEXT: ; BB#2: -; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0 +; CHECK-NEXT: exp null off, off, off, off done vm ; CHECK-NEXT: s_endpgm ; CHECK-NEXT: {{^}}[[SPLIT_BB]]: