Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -8,23 +8,12 @@ //===------------------------------------------------------------===// include "llvm/Target/Target.td" +include "AMDGPUFeatures.td" //===------------------------------------------------------------===// // Subtarget Features (device properties) //===------------------------------------------------------------===// -def FeatureFP64 : SubtargetFeature<"fp64", - "FP64", - "true", - "Enable double precision operations" ->; - -def FeatureFMA : SubtargetFeature<"fmaf", - "FMA", - "true", - "Enable single precision FMA (not as fast as mul+add, but fused)" ->; - def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", "FastFMAF32", "true", @@ -43,30 +32,6 @@ "Most fp64 instructions are half rate instead of quarter" >; -def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst", - "R600ALUInst", - "false", - "Older version of ALU instructions encoding" ->; - -def FeatureVertexCache : SubtargetFeature<"HasVertexCache", - "HasVertexCache", - "true", - "Specify use of dedicated vertex cache" ->; - -def FeatureCaymanISA : SubtargetFeature<"caymanISA", - "CaymanISA", - "true", - "Use Cayman ISA" ->; - -def FeatureCFALUBug : SubtargetFeature<"cfalubug", - "CFALUBug", - "true", - "GPU has CF_ALU bug" ->; - def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", "FlatAddressSpace", "true", @@ -152,27 +117,6 @@ "VI SGPR initialization bug requiring a fixed SGPR allocation size" >; -class SubtargetFeatureFetchLimit : - SubtargetFeature <"fetch"#Value, - "TexVTXClauseSize", - Value, - "Limit the maximum number of fetches in a clause to "#Value ->; - -def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">; -def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">; - -class SubtargetFeatureWavefrontSize : SubtargetFeature< - "wavefrontsize"#Value, - "WavefrontSize", - !cast(Value), - "The number of threads per wavefront" ->; - -def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>; -def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>; -def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>; - class SubtargetFeatureLDSBankCount : SubtargetFeature < "ldsbankcount"#Value, "LDSBankCount", @@ -183,19 +127,6 @@ def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>; def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>; -class SubtargetFeatureLocalMemorySize : SubtargetFeature< - "localmemorysize"#Value, - "LocalMemorySize", - !cast(Value), - "The size of local memory in bytes" ->; - -def FeatureGCN : SubtargetFeature<"gcn", - "IsGCN", - "true", - "GCN or newer GPU" ->; - def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding", "GCN3Encoding", "true", @@ -368,12 +299,6 @@ [FeatureFP64FP16Denormals] >; -def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp", - "DX10Clamp", - "true", - "clamp modifier clamps NaNs to 0.0" ->; - def FeatureFPExceptions : SubtargetFeature<"fp-exceptions", "FPExceptions", "true", @@ -416,12 +341,6 @@ "Dump MachineInstrs in the CodeEmitter" >; -def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca", - "EnablePromoteAlloca", - "true", - "Enable promote alloca pass" ->; - // XXX - This should probably be removed once enabled by default def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt", "EnableLoadStoreOpt", @@ -485,45 +404,29 @@ "Dummy feature to disable assembler instructions" >; -class SubtargetFeatureGeneration Implies> : - SubtargetFeature ; - -def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>; -def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>; -def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>; - -def FeatureR600 : SubtargetFeatureGeneration<"R600", - [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0] ->; - -def FeatureR700 : SubtargetFeatureGeneration<"R700", - [FeatureFetchLimit16, FeatureLocalMemorySize0] ->; - -def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN", - [FeatureFetchLimit16, FeatureLocalMemorySize32768] +def FeatureGCN : SubtargetFeature<"gcn", + "IsGCN", + "true", + "GCN or newer GPU" >; -def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS", - [FeatureFetchLimit16, FeatureWavefrontSize64, - FeatureLocalMemorySize32768] ->; +class AMDGPUSubtargetFeatureGeneration Implies> : + SubtargetFeatureGeneration ; -def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS", +def FeatureSouthernIslands : AMDGPUSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureGCN, FeatureLDSBankCount32, FeatureMovrel] >; -def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS", +def FeatureSeaIslands : AMDGPUSubtargetFeatureGeneration<"SEA_ISLANDS", [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace, FeatureCIInsts, FeatureMovrel] >; -def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", +def FeatureVolcanicIslands : AMDGPUSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, @@ -534,7 +437,7 @@ ] >; -def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9", +def FeatureGFX9 : AMDGPUSubtargetFeatureGeneration<"GFX9", [FeatureFP64, FeatureLocalMemorySize65536, FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, @@ -744,8 +647,6 @@ // Predicate helper class //===----------------------------------------------------------------------===// -def TruePredicate : Predicate<"true">; - def isSICI : Predicate< "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS" @@ -837,36 +738,15 @@ def EnableLateCFGStructurize : Predicate< "EnableLateStructurizeCFG">; -// Exists to help track down where SubtargetPredicate isn't set rather -// than letting tablegen crash with an unhelpful error. -def InvalidPred : Predicate<"predicate not set on instruction or pattern">; - -class PredicateControl { - Predicate SubtargetPredicate = InvalidPred; - Predicate SIAssemblerPredicate = isSICI; - Predicate VIAssemblerPredicate = isVI; - list AssemblerPredicates = []; - Predicate AssemblerPredicate = TruePredicate; - list OtherPredicates = []; - list Predicates = !listconcat([SubtargetPredicate, - AssemblerPredicate], - AssemblerPredicates, - OtherPredicates); -} - -class AMDGPUPat : Pat, - PredicateControl; - - // Include AMDGPU TD files -include "R600Schedule.td" -include "R600Processors.td" include "SISchedule.td" include "GCNProcessors.td" include "AMDGPUInstrInfo.td" include "AMDGPUIntrinsics.td" +include "SIIntrinsics.td" include "AMDGPURegisterInfo.td" include "AMDGPURegisterBanks.td" include "AMDGPUInstructions.td" +include "SIInstrInfo.td" include "AMDGPUCallingConv.td" include "AMDGPUSearchableTables.td" Index: lib/Target/AMDGPU/AMDGPUCallingConv.td =================================================================== --- lib/Target/AMDGPU/AMDGPUCallingConv.td +++ lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -85,17 +85,6 @@ ]>> ]>; -// Calling convention for R600 -def CC_R600 : CallingConv<[ - CCIfInReg>> -]>; - // Calling convention for compute kernels def CC_AMDGPU_Kernel : CallingConv<[ CCCustom<"allocateKernArg"> @@ -165,9 +154,5 @@ CCIf<"static_cast" "(State.getMachineFunction().getSubtarget()).getGeneration() >= " "AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C", - CCDelegateTo>, - CCIf<"static_cast" - "(State.getMachineFunction().getSubtarget()).getGeneration() < " - "AMDGPUSubtarget::SOUTHERN_ISLANDS", - CCDelegateTo> + CCDelegateTo> ]>; Index: lib/Target/AMDGPU/AMDGPUFeatures.td =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPUFeatures.td @@ -0,0 +1,52 @@ + +def FeatureFP64 : SubtargetFeature<"fp64", + "FP64", + "true", + "Enable double precision operations" +>; + +def FeatureFMA : SubtargetFeature<"fmaf", + "FMA", + "true", + "Enable single precision FMA (not as fast as mul+add, but fused)" +>; + +class SubtargetFeatureLocalMemorySize : SubtargetFeature< + "localmemorysize"#Value, + "LocalMemorySize", + !cast(Value), + "The size of local memory in bytes" +>; + +def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>; +def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>; +def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>; + +class SubtargetFeatureWavefrontSize : SubtargetFeature< + "wavefrontsize"#Value, + "WavefrontSize", + !cast(Value), + "The number of threads per wavefront" +>; + +def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>; +def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>; +def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>; + +class SubtargetFeatureGeneration Implies> : + SubtargetFeature ; + +def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp", + "DX10Clamp", + "true", + "clamp modifier clamps NaNs to 0.0" +>; + +def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca", + "EnablePromoteAlloca", + "true", + "Enable promote alloca pass" +>; + Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -104,15 +104,11 @@ bool isNoNanSrc(SDValue N) const; bool isInlineImmediate(const SDNode *N) const; - bool isConstantLoad(const MemSDNode *N, int cbID) const; bool isUniformBr(const SDNode *N) const; SDNode *glueCopyToM0(SDNode *N) const; const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; - bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); - bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, - SDValue& Offset); virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, @@ -227,9 +223,18 @@ }; class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { + const R600Subtarget *Subtarget; + AMDGPUAS AMDGPUASI; + + bool isConstantLoad(const MemSDNode *N, int cbID) const; + bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); + bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, + SDValue& Offset); public: explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : - AMDGPUDAGToDAGISel(TM, OptLevel) {} + AMDGPUDAGToDAGISel(TM, OptLevel) { + AMDGPUASI = AMDGPU::getAMDGPUAS(*TM); + } void Select(SDNode *N) override; @@ -237,6 +242,11 @@ SDValue &Offset) override; bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset) override; + + bool runOnMachineFunction(MachineFunction &MF) override; +protected: + // Include the pieces autogenerated from the target description. +#include "R600GenDAGISel.inc" }; } // end anonymous namespace @@ -280,8 +290,7 @@ } bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { - const SIInstrInfo *TII - = static_cast(Subtarget)->getInstrInfo(); + const SIInstrInfo *TII = Subtarget->getInstrInfo(); if (const ConstantSDNode *C = dyn_cast(N)) return TII->isInlineConstant(C->getAPIntValue()); @@ -636,16 +645,6 @@ SelectCode(N); } -bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { - if (!N->readMem()) - return false; - if (CbId == -1) - return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || - N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT; - - return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; -} - bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); const Instruction *Term = BB->getTerminator(); @@ -661,26 +660,6 @@ // Complex Patterns //===----------------------------------------------------------------------===// -bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, - SDValue& IntPtr) { - if (ConstantSDNode *Cst = dyn_cast(Addr)) { - IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), - true); - return true; - } - return false; -} - -bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, - SDValue& BaseReg, SDValue &Offset) { - if (!isa(Addr)) { - BaseReg = Addr; - Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); - return true; - } - return false; -} - bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset) { return false; @@ -692,11 +671,11 @@ SDLoc DL(Addr); if ((C = dyn_cast(Addr))) { - Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); + Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && (C = dyn_cast(Addr.getOperand(0)))) { - Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); + Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && (C = dyn_cast(Addr.getOperand(1)))) { @@ -2159,6 +2138,41 @@ } while (IsModified); } +bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { + Subtarget = &MF.getSubtarget(); + return SelectionDAGISel::runOnMachineFunction(MF); +} + +bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { + if (!N->readMem()) + return false; + if (CbId == -1) + return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || + N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT; + + return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; +} + +bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, + SDValue& IntPtr) { + if (ConstantSDNode *Cst = dyn_cast(Addr)) { + IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), + true); + return true; + } + return false; +} + +bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, + SDValue& BaseReg, SDValue &Offset) { + if (!isa(Addr)) { + BaseReg = Addr; + Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); + return true; + } + return false; +} + void R600DAGToDAGISel::Select(SDNode *N) { unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { @@ -2179,12 +2193,12 @@ // pass. We want to avoid 128 bits copies as much as possible because they // can't be bundled by our scheduler. switch(NumVectorElts) { - case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; + case 2: RegClassID = R600::R600_Reg64RegClassID; break; case 4: if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) - RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; + RegClassID = R600::R600_Reg128VerticalRegClassID; else - RegClassID = AMDGPU::R600_Reg128RegClassID; + RegClassID = R600::R600_Reg128RegClassID; break; default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); } @@ -2202,11 +2216,11 @@ SDLoc DL(Addr); if ((C = dyn_cast(Addr))) { - Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); + Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && (C = dyn_cast(Addr.getOperand(0)))) { - Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); + Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && (C = dyn_cast(Addr.getOperand(1)))) { @@ -2237,7 +2251,7 @@ && isInt<16>(IMMOffset->getZExtValue())) { Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(CurDAG->getEntryNode()), - AMDGPU::ZERO, MVT::i32); + R600::ZERO, MVT::i32); Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), MVT::i32); return true; Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -23,11 +23,13 @@ namespace llvm { class AMDGPUMachineFunction; -class AMDGPUSubtarget; +class AMDGPUCommonSubtarget; struct ArgDescriptor; class AMDGPUTargetLowering : public TargetLowering { private: + const AMDGPUCommonSubtarget *Subtarget; + /// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been /// legalized from a smaller type VT. Need to match pre-legalized type because /// the generic legalization inserts the add/sub between the select and @@ -39,7 +41,6 @@ static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG); protected: - const AMDGPUSubtarget *Subtarget; AMDGPUAS AMDGPUASI; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; @@ -123,7 +124,7 @@ void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl &Ins) const; public: - AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); + AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUCommonSubtarget &STI); bool mayIgnoreSignedZero(SDValue Op) const { if (getTargetMachine().Options.NoSignedZerosFPMath) Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -151,7 +151,7 @@ } AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, - const AMDGPUSubtarget &STI) + const AMDGPUCommonSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { AMDGPUASI = AMDGPU::getAMDGPUAS(TM); // Lower floating point store/load to integer store/load to reduce the number @@ -326,10 +326,6 @@ setOperationAction(ISD::FLOG, MVT::f32, Custom); setOperationAction(ISD::FLOG10, MVT::f32, Custom); - if (Subtarget->has16BitInsts()) { - setOperationAction(ISD::FLOG, MVT::f16, Custom); - setOperationAction(ISD::FLOG10, MVT::f16, Custom); - } setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom); setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); @@ -337,10 +333,6 @@ setOperationAction(ISD::FREM, MVT::f32, Custom); setOperationAction(ISD::FREM, MVT::f64, Custom); - // v_mad_f32 does not support denormals according to some sources. - if (!Subtarget->hasFP32Denormals()) - setOperationAction(ISD::FMAD, MVT::f32, Legal); - // Expand to fneg + fadd. setOperationAction(ISD::FSUB, MVT::f64, Expand); @@ -355,19 +347,6 @@ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom); - if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { - setOperationAction(ISD::FCEIL, MVT::f64, Custom); - setOperationAction(ISD::FTRUNC, MVT::f64, Custom); - setOperationAction(ISD::FRINT, MVT::f64, Custom); - setOperationAction(ISD::FFLOOR, MVT::f64, Custom); - } - - if (!Subtarget->hasBFI()) { - // fcopysign can be done in a single instruction with BFI. - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - } - setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom); setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom); @@ -399,12 +378,6 @@ setOperationAction(ISD::SUBE, VT, Legal); } - if (!Subtarget->hasBCNT(32)) - setOperationAction(ISD::CTPOP, MVT::i32, Expand); - - if (!Subtarget->hasBCNT(64)) - setOperationAction(ISD::CTPOP, MVT::i64, Expand); - // The hardware supports 32-bit ROTR, but not ROTL. setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); @@ -424,28 +397,11 @@ setOperationAction(ISD::SMAX, MVT::i32, Legal); setOperationAction(ISD::UMAX, MVT::i32, Legal); - if (Subtarget->hasFFBH()) - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); - - if (Subtarget->hasFFBL()) - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); - setOperationAction(ISD::CTTZ, MVT::i64, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom); setOperationAction(ISD::CTLZ, MVT::i64, Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); - // We only really have 32-bit BFE instructions (and 16-bit on VI). - // - // On SI+ there are 64-bit BFEs, but they are scalar only and there isn't any - // effort to match them now. We want this to be false for i64 cases when the - // extraction isn't restricted to the upper or lower half. Ideally we would - // have some pass reduce 64-bit extracts to 32-bit if possible. Extracts that - // span the midpoint are probably relatively rare, so don't worry about them - // for now. - if (Subtarget->hasBFE()) - setHasExtractBitsInsn(true); - static const MVT::SimpleValueType VectorIntTypes[] = { MVT::v2i32, MVT::v4i32 }; @@ -550,11 +506,6 @@ // vector compares until that is fixed. setHasMultipleConditionRegisters(true); - // SI at least has hardware support for floating point exceptions, but no way - // of using or handling them is implemented. They are also optional in OpenCL - // (Section 7.3) - setHasFloatingPointExceptions(Subtarget->hasFPExceptions()); - PredictableSelectIsExpensive = false; // We want to find all load dependencies for long chains of stores to enable @@ -776,7 +727,7 @@ { const LoadSDNode * L = dyn_cast(N); if (L->getMemOperand()->getAddrSpace() - == Subtarget->getAMDGPUAS().CONSTANT_ADDRESS_32BIT) + == AMDGPUASI.CONSTANT_ADDRESS_32BIT) return true; return false; } @@ -4302,9 +4253,11 @@ switch (IID) { case Intrinsic::amdgcn_mbcnt_lo: case Intrinsic::amdgcn_mbcnt_hi: { + const SISubtarget &ST = + DAG.getMachineFunction().getSubtarget(); // These return at most the wavefront size - 1. unsigned Size = Op.getValueType().getSizeInBits(); - Known.Zero.setHighBits(Size - Subtarget->getWavefrontSizeLog2()); + Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2()); break; } default: Index: lib/Target/AMDGPU/AMDGPUInstrInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -20,10 +20,6 @@ #include "Utils/AMDGPUBaseInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#define GET_INSTRINFO_HEADER -#include "AMDGPUGenInstrInfo.inc" -#undef GET_INSTRINFO_HEADER - namespace llvm { class AMDGPUSubtarget; @@ -31,26 +27,10 @@ class MachineInstr; class MachineInstrBuilder; -class AMDGPUInstrInfo : public AMDGPUGenInstrInfo { -private: - const AMDGPUSubtarget &ST; - - virtual void anchor(); -protected: - AMDGPUAS AMDGPUASI; - +class AMDGPUInstrInfo { public: explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st); - bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, - int64_t Offset1, int64_t Offset2, - unsigned NumLoads) const override; - - /// Return a target-specific opcode if Opcode is a pseudo instruction. - /// Return -1 if the target-specific opcode for the pseudo instruction does - /// not exist. If Opcode is not a pseudo instruction, this is identity. - int pseudoToMCOpcode(int Opcode) const; - static bool isUniformMMO(const MachineMemOperand *MMO); }; Index: lib/Target/AMDGPU/AMDGPUInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -23,109 +23,7 @@ using namespace llvm; -#define GET_INSTRINFO_CTOR_DTOR -#include "AMDGPUGenInstrInfo.inc" - -namespace llvm { -namespace AMDGPU { -#define GET_RSRCINTRINSIC_IMPL -#include "AMDGPUGenSearchableTables.inc" - -#define GET_D16IMAGEDIMINTRINSIC_IMPL -#include "AMDGPUGenSearchableTables.inc" -} -} - -// Pin the vtable to this file. -void AMDGPUInstrInfo::anchor() {} - -AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) - : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), - ST(ST), - AMDGPUASI(ST.getAMDGPUAS()) {} - -// FIXME: This behaves strangely. If, for example, you have 32 load + stores, -// the first 16 loads will be interleaved with the stores, and the next 16 will -// be clustered as expected. It should really split into 2 16 store batches. -// -// Loads are clustered until this returns false, rather than trying to schedule -// groups of stores. This also means we have to deal with saying different -// address space loads should be clustered, and ones which might cause bank -// conflicts. -// -// This might be deprecated so it might not be worth that much effort to fix. -bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, - int64_t Offset0, int64_t Offset1, - unsigned NumLoads) const { - assert(Offset1 > Offset0 && - "Second offset should be larger than first offset!"); - // If we have less than 16 loads in a row, and the offsets are within 64 - // bytes, then schedule together. - - // A cacheline is 64 bytes (for global memory). - return (NumLoads <= 16 && (Offset1 - Offset0) < 64); -} - -// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td -enum SIEncodingFamily { - SI = 0, - VI = 1, - SDWA = 2, - SDWA9 = 3, - GFX80 = 4, - GFX9 = 5 -}; - -static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) { - switch (ST.getGeneration()) { - case AMDGPUSubtarget::SOUTHERN_ISLANDS: - case AMDGPUSubtarget::SEA_ISLANDS: - return SIEncodingFamily::SI; - case AMDGPUSubtarget::VOLCANIC_ISLANDS: - case AMDGPUSubtarget::GFX9: - return SIEncodingFamily::VI; - - // FIXME: This should never be called for r600 GPUs. - case AMDGPUSubtarget::R600: - case AMDGPUSubtarget::R700: - case AMDGPUSubtarget::EVERGREEN: - case AMDGPUSubtarget::NORTHERN_ISLANDS: - return SIEncodingFamily::SI; - } - - llvm_unreachable("Unknown subtarget generation!"); -} - -int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const { - SIEncodingFamily Gen = subtargetEncodingFamily(ST); - - if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 && - ST.getGeneration() >= AMDGPUSubtarget::GFX9) - Gen = SIEncodingFamily::GFX9; - - if (get(Opcode).TSFlags & SIInstrFlags::SDWA) - Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9 - : SIEncodingFamily::SDWA; - // Adjust the encoding family to GFX80 for D16 buffer instructions when the - // subtarget has UnpackedD16VMem feature. - // TODO: remove this when we discard GFX80 encoding. - if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16) - && !(get(Opcode).TSFlags & SIInstrFlags::MIMG)) - Gen = SIEncodingFamily::GFX80; - - int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); - - // -1 means that Opcode is already a native instruction. - if (MCOp == -1) - return Opcode; - - // (uint16_t)-1 means that Opcode is a pseudo instruction that has - // no encoding in the given subtarget generation. - if (MCOp == (uint16_t)-1) - return -1; - - return MCOp; -} +AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) { } // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence. bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) { Index: lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructions.td +++ lib/Target/AMDGPU/AMDGPUInstructions.td @@ -42,6 +42,47 @@ field bits<32> Inst = 0xffffffff; } +//===---------------------------------------------------------------------===// +// Return instruction +//===---------------------------------------------------------------------===// + +class ILFormat pattern> +: Instruction { + + let Namespace = "AMDGPU"; + dag OutOperandList = outs; + dag InOperandList = ins; + let Pattern = pattern; + let AsmString = !strconcat(asmstr, "\n"); + let isPseudo = 1; + let Itinerary = NullALU; + bit hasIEEEFlag = 0; + bit hasZeroOpFlag = 0; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let isCodeGenOnly = 1; +} + +def TruePredicate : Predicate<"true">; + +// Exists to help track down where SubtargetPredicate isn't set rather +// than letting tablegen crash with an unhelpful error. +def InvalidPred : Predicate<"predicate not set on instruction or pattern">; + +class PredicateControl { + Predicate SubtargetPredicate = InvalidPred; + list AssemblerPredicates = []; + Predicate AssemblerPredicate = TruePredicate; + list OtherPredicates = []; + list Predicates = !listconcat([SubtargetPredicate, + AssemblerPredicate], + AssemblerPredicates, + OtherPredicates); +} +class AMDGPUPat : Pat, + PredicateControl; + def FP16Denormals : Predicate<"Subtarget->hasFP16Denormals()">; def FP32Denormals : Predicate<"Subtarget->hasFP32Denormals()">; def FP64Denormals : Predicate<"Subtarget->hasFP64Denormals()">; @@ -94,12 +135,6 @@ // Misc. PatFrags //===----------------------------------------------------------------------===// -class HasOneUseUnaryOp : PatFrag< - (ops node:$src0), - (op $src0), - [{ return N->hasOneUse(); }] ->; - class HasOneUseBinOp : PatFrag< (ops node:$src0, node:$src1), (op $src0, $src1), @@ -112,8 +147,6 @@ [{ return N->hasOneUse(); }] >; -def trunc_oneuse : HasOneUseUnaryOp; - let Properties = [SDNPCommutative, SDNPAssociative] in { def smax_oneuse : HasOneUseBinOp; def smin_oneuse : HasOneUseBinOp; @@ -239,6 +272,37 @@ [{(void)N; return false;}] >; +//===----------------------------------------------------------------------===// +// PatLeafs for Texture Constants +//===----------------------------------------------------------------------===// + +def TEX_ARRAY : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 9 || TType == 10 || TType == 16; + }] +>; + +def TEX_RECT : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 5; + }] +>; + +def TEX_SHADOW : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return (TType >= 6 && TType <= 8) || TType == 13; + }] +>; + +def TEX_SHADOW_ARRAY : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 11 || TType == 12 || TType == 17; + }] +>; //===----------------------------------------------------------------------===// // Load/Store Pattern Fragments @@ -746,11 +810,3 @@ (AMDGPUrcp (fsqrt vt:$src)), (RsqInst $src) >; - -include "R600Instructions.td" -include "R700Instructions.td" -include "EvergreenInstructions.td" -include "CaymanInstructions.td" - -include "SIInstrInfo.td" - Index: lib/Target/AMDGPU/AMDGPUIntrinsics.td =================================================================== --- lib/Target/AMDGPU/AMDGPUIntrinsics.td +++ lib/Target/AMDGPU/AMDGPUIntrinsics.td @@ -14,5 +14,3 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>; } - -include "SIIntrinsics.td" Index: lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp +++ lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp @@ -117,7 +117,6 @@ return false; const TargetMachine &TM = TPC->getTM(); - const AMDGPUSubtarget &ST = TM.getSubtarget(F); bool Changed = false; for (auto *U : F.users()) { @@ -125,7 +124,7 @@ if (!CI) continue; - Changed |= ST.makeLIDRangeMetadata(CI); + Changed |= AMDGPUCommonSubtarget::get(TM, F).makeLIDRangeMetadata(CI); } return Changed; } Index: lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -152,7 +152,7 @@ IsAMDGCN = TT.getArch() == Triple::amdgcn; IsAMDHSA = TT.getOS() == Triple::AMDHSA; - const AMDGPUSubtarget &ST = TM->getSubtarget(F); + const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F); if (!ST.isPromoteAllocaEnabled()) return false; @@ -174,8 +174,8 @@ std::pair AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) { - const AMDGPUSubtarget &ST = TM->getSubtarget( - *Builder.GetInsertBlock()->getParent()); + const Function &F = *Builder.GetInsertBlock()->getParent(); + const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F); if (!IsAMDHSA) { Function *LocalSizeYFn @@ -261,8 +261,8 @@ } Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) { - const AMDGPUSubtarget &ST = TM->getSubtarget( - *Builder.GetInsertBlock()->getParent()); + const AMDGPUCommonSubtarget &ST = + AMDGPUCommonSubtarget::get(*TM, *Builder.GetInsertBlock()->getParent()); Intrinsic::ID IntrID = Intrinsic::ID::not_intrinsic; switch (N) { @@ -602,7 +602,7 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) { FunctionType *FTy = F.getFunctionType(); - const AMDGPUSubtarget &ST = TM->getSubtarget(F); + const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F); // If the function has any arguments in the local address space, then it's // possible these arguments require the entire local memory space, so @@ -729,8 +729,7 @@ if (!SufficientLDS) return false; - const AMDGPUSubtarget &ST = - TM->getSubtarget(ContainingFunction); + const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, ContainingFunction); unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second; const DataLayout &DL = Mod->getDataLayout(); Index: lib/Target/AMDGPU/AMDGPURegisterInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterInfo.td +++ lib/Target/AMDGPU/AMDGPURegisterInfo.td @@ -19,5 +19,4 @@ } -include "R600RegisterInfo.td" include "SIRegisterInfo.td" Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -39,22 +39,181 @@ #define GET_SUBTARGETINFO_HEADER #include "AMDGPUGenSubtargetInfo.inc" +#define GET_SUBTARGETINFO_HEADER +#include "R600GenSubtargetInfo.inc" namespace llvm { class StringRef; -class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { +class AMDGPUCommonSubtarget { +private: + Triple TargetTriple; + +protected: + const FeatureBitset &SubtargetFeatureBits; + bool Has16BitInsts; + bool HasMadMixInsts; + bool FP32Denormals; + bool FPExceptions; + bool HasSDWA; + bool HasVOP3PInsts; + bool HasMulI24; + bool HasMulU24; + bool HasFminFmaxLegacy; + bool EnablePromoteAlloca; + int LocalMemorySize; + unsigned WavefrontSize; + +public: + AMDGPUCommonSubtarget(const Triple &TT, const FeatureBitset &FeatureBits); + + static const AMDGPUCommonSubtarget &get(const MachineFunction &MF); + static const AMDGPUCommonSubtarget &get(const TargetMachine &TM, + const Function &F); + + /// \returns Default range flat work group size for a calling convention. + std::pair getDefaultFlatWorkGroupSize(CallingConv::ID CC) const; + + /// \returns Subtarget's default pair of minimum/maximum flat work group sizes + /// for function \p F, or minimum/maximum flat work group sizes explicitly + /// requested using "amdgpu-flat-work-group-size" attribute attached to + /// function \p F. + /// + /// \returns Subtarget's default values if explicitly requested values cannot + /// be converted to integer, or violate subtarget's specifications. + std::pair getFlatWorkGroupSizes(const Function &F) const; + + /// \returns Subtarget's default pair of minimum/maximum number of waves per + /// execution unit for function \p F, or minimum/maximum number of waves per + /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute + /// attached to function \p F. + /// + /// \returns Subtarget's default values if explicitly requested values cannot + /// be converted to integer, violate subtarget's specifications, or are not + /// compatible with minimum/maximum number of waves limited by flat work group + /// size, register usage, and/or lds usage. + std::pair getWavesPerEU(const Function &F) const; + + /// Return the amount of LDS that can be used that will not restrict the + /// occupancy lower than WaveCount. + unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, + const Function &) const; + + /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if + /// the given LDS memory size is the only constraint. + unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const; + + unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const; + + bool isAmdHsaOS() const { + return TargetTriple.getOS() == Triple::AMDHSA; + } + + bool isAmdPalOS() const { + return TargetTriple.getOS() == Triple::AMDPAL; + } + + bool has16BitInsts() const { + return Has16BitInsts; + } + + bool hasMadMixInsts() const { + return HasMadMixInsts; + } + + bool hasFP32Denormals() const { + return FP32Denormals; + } + + bool hasFPExceptions() const { + return FPExceptions; + } + + bool hasSDWA() const { + return HasSDWA; + } + + bool hasVOP3PInsts() const { + return HasVOP3PInsts; + } + + bool hasMulI24() const { + return HasMulI24; + } + + bool hasMulU24() const { + return HasMulU24; + } + + bool hasFminFmaxLegacy() const { + return HasFminFmaxLegacy; + } + + bool isPromoteAllocaEnabled() const { + return EnablePromoteAlloca; + } + + unsigned getWavefrontSize() const { + return WavefrontSize; + } + + int getLocalMemorySize() const { + return LocalMemorySize; + } + + unsigned getAlignmentForImplicitArgPtr() const { + return isAmdHsaOS() ? 8 : 4; + } + + /// \returns Maximum number of work groups per compute unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const { + return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits, + FlatWorkGroupSize); + } + + /// \returns Minimum flat work group size supported by the subtarget. + unsigned getMinFlatWorkGroupSize() const { + return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits); + } + + /// \returns Maximum flat work group size supported by the subtarget. + unsigned getMaxFlatWorkGroupSize() const { + return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits); + } + + /// \returns Maximum number of waves per execution unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const { + return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits, + FlatWorkGroupSize); + } + + /// \returns Minimum number of waves per execution unit supported by the + /// subtarget. + unsigned getMinWavesPerEU() const { + return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits); + } + + unsigned getMaxWavesPerEU() const { return 10; } + + /// Creates value range metadata on an workitemid.* inrinsic call or load. + bool makeLIDRangeMetadata(Instruction *I) const; + + virtual ~AMDGPUCommonSubtarget() {} +}; + +class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo, + public AMDGPUCommonSubtarget { public: enum Generation { - R600 = 0, - R700, - EVERGREEN, - NORTHERN_ISLANDS, - SOUTHERN_ISLANDS, - SEA_ISLANDS, - VOLCANIC_ISLANDS, - GFX9, + // Gap for R600 generations, so we can do comparisons between + // AMDGPUSubtarget and r600Subtarget. + SOUTHERN_ISLANDS = 4, + SEA_ISLANDS = 5, + VOLCANIC_ISLANDS = 6, + GFX9 = 7, }; enum { @@ -96,13 +255,20 @@ LLVMTrapHandlerRegValue = 1 }; +private: + SIFrameLowering FrameLowering; + + /// GlobalISel related APIs. + std::unique_ptr CallLoweringInfo; + std::unique_ptr InstSelector; + std::unique_ptr Legalizer; + std::unique_ptr RegBankInfo; + protected: // Basic subtarget description. Triple TargetTriple; - Generation Gen; + unsigned Gen; unsigned IsaVersion; - unsigned WavefrontSize; - int LocalMemorySize; int LDSBankCount; unsigned MaxPrivateElementSize; @@ -111,9 +277,7 @@ bool HalfRate64Ops; // Dynamially set bits that enable features. - bool FP32Denormals; bool FP64FP16Denormals; - bool FPExceptions; bool DX10Clamp; bool FlatForGlobal; bool AutoWaitcntBeforeBarrier; @@ -130,7 +294,6 @@ // Used as options. bool EnableHugePrivateBuffer; bool EnableVGPRSpilling; - bool EnablePromoteAlloca; bool EnableLoadStoreOpt; bool EnableUnsafeDSOffsetFolding; bool EnableSIScheduler; @@ -147,17 +310,13 @@ bool GFX9Insts; bool SGPRInitBug; bool HasSMemRealTime; - bool Has16BitInsts; bool HasIntClamp; - bool HasVOP3PInsts; - bool HasMadMixInsts; bool HasFmaMixInsts; bool HasMovrel; bool HasVGPRIndexMode; bool HasScalarStores; bool HasScalarAtomics; bool HasInv2PiInlineImm; - bool HasSDWA; bool HasSDWAOmod; bool HasSDWAScalar; bool HasSDWASdst; @@ -182,7 +341,6 @@ // Dummy feature to use for assembler in tablegen. bool FeatureDisable; - InstrItineraryData InstrItins; SelectionDAGTargetInfo TSInfo; AMDGPUAS AS; @@ -194,13 +352,30 @@ AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS); - const AMDGPUInstrInfo *getInstrInfo() const override = 0; - const AMDGPUFrameLowering *getFrameLowering() const override = 0; - const AMDGPUTargetLowering *getTargetLowering() const override = 0; - const AMDGPURegisterInfo *getRegisterInfo() const override = 0; + virtual const SIInstrInfo *getInstrInfo() const override = 0; - const InstrItineraryData *getInstrItineraryData() const override { - return &InstrItins; + const SIFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + + virtual const SITargetLowering *getTargetLowering() const override = 0; + + virtual const SIRegisterInfo *getRegisterInfo() const override = 0; + + const CallLowering *getCallLowering() const override { + return CallLoweringInfo.get(); + } + + const InstructionSelector *getInstructionSelector() const override { + return InstSelector.get(); + } + + const LegalizerInfo *getLegalizerInfo() const override { + return Legalizer.get(); + } + + const RegisterBankInfo *getRegBankInfo() const override { + return RegBankInfo.get(); } // Nothing implemented, just prevent crashes on use. @@ -210,34 +385,18 @@ void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - bool isAmdHsaOS() const { - return TargetTriple.getOS() == Triple::AMDHSA; - } - bool isMesa3DOS() const { return TargetTriple.getOS() == Triple::Mesa3D; } - bool isAmdPalOS() const { - return TargetTriple.getOS() == Triple::AMDPAL; - } - Generation getGeneration() const { - return Gen; - } - - unsigned getWavefrontSize() const { - return WavefrontSize; + return (Generation)Gen; } unsigned getWavefrontSizeLog2() const { return Log2_32(WavefrontSize); } - int getLocalMemorySize() const { - return LocalMemorySize; - } - int getLDSBankCount() const { return LDSBankCount; } @@ -250,18 +409,10 @@ return AS; } - bool has16BitInsts() const { - return Has16BitInsts; - } - bool hasIntClamp() const { return HasIntClamp; } - bool hasVOP3PInsts() const { - return HasVOP3PInsts; - } - bool hasFP64() const { return FP64; } @@ -270,6 +421,10 @@ return MIMG_R128; } + bool hasHWFP64() const { + return FP64; + } + bool hasFastFMAF32() const { return FastFMAF32; } @@ -279,15 +434,15 @@ } bool hasAddr64() const { - return (getGeneration() < VOLCANIC_ISLANDS); + return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS); } bool hasBFE() const { - return (getGeneration() >= EVERGREEN); + return true; } bool hasBFI() const { - return (getGeneration() >= EVERGREEN); + return true; } bool hasBFM() const { @@ -295,42 +450,23 @@ } bool hasBCNT(unsigned Size) const { - if (Size == 32) - return (getGeneration() >= EVERGREEN); - - if (Size == 64) - return (getGeneration() >= SOUTHERN_ISLANDS); - - return false; - } - - bool hasMulU24() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasMulI24() const { - return (getGeneration() >= SOUTHERN_ISLANDS || - hasCaymanISA()); + return true; } bool hasFFBL() const { - return (getGeneration() >= EVERGREEN); + return true; } bool hasFFBH() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasMed3_16() const { - return getGeneration() >= GFX9; + return true; } - bool hasMin3Max3_16() const { - return getGeneration() >= GFX9; + virtual bool hasMed3_16() const { + return getGeneration() >= AMDGPUSubtarget::GFX9; } - bool hasMadMixInsts() const { - return HasMadMixInsts; + virtual bool hasMin3Max3_16() const { + return getGeneration() >= AMDGPUSubtarget::GFX9; } bool hasFmaMixInsts() const { @@ -338,15 +474,15 @@ } bool hasCARRY() const { - return (getGeneration() >= EVERGREEN); + return true; } - bool hasBORROW() const { - return (getGeneration() >= EVERGREEN); + virtual bool hasBORROW() const { + return true; } - bool hasCaymanISA() const { - return CaymanISA; + virtual bool hasCaymanISA() const { + return false; } bool hasFMA() const { @@ -361,10 +497,6 @@ return EnableHugePrivateBuffer; } - bool isPromoteAllocaEnabled() const { - return EnablePromoteAlloca; - } - bool unsafeDSOffsetFoldingEnabled() const { return EnableUnsafeDSOffsetFolding; } @@ -378,20 +510,10 @@ unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const; - /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if - /// the given LDS memory size is the only constraint. - unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const; - - unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const; - bool hasFP16Denormals() const { return FP64FP16Denormals; } - bool hasFP32Denormals() const { - return FP32Denormals; - } - bool hasFP64Denormals() const { return FP64FP16Denormals; } @@ -400,10 +522,6 @@ return getGeneration() >= AMDGPUSubtarget::GFX9; } - bool hasFPExceptions() const { - return FPExceptions; - } - bool enableDX10Clamp() const { return DX10Clamp; } @@ -445,7 +563,7 @@ } bool hasApertureRegs() const { - return HasApertureRegs; + return HasApertureRegs; } bool isTrapHandlerEnabled() const { @@ -511,14 +629,6 @@ return getGeneration() >= SEA_ISLANDS; } - bool hasFminFmaxLegacy() const { - return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS; - } - - bool hasSDWA() const { - return HasSDWA; - } - bool hasSDWAOmod() const { return HasSDWAOmod; } @@ -557,10 +667,6 @@ return isAmdCodeObjectV2(F) ? 0 : 36; } - unsigned getAlignmentForImplicitArgPtr() const { - return isAmdHsaOS() ? 8 : 4; - } - /// \returns Number of bytes of arguments that are passed to a shader or /// kernel in addition to the explicit ones declared for the function. unsigned getImplicitArgNumBytes(const Function &F) const { @@ -589,134 +695,39 @@ return true; } - void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;} - bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;} + void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; } + bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; } /// \returns Number of execution units per compute unit supported by the /// subtarget. unsigned getEUsPerCU() const { - return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits()); - } - - /// \returns Maximum number of work groups per compute unit supported by the - /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(), - FlatWorkGroupSize); + return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits()); } /// \returns Maximum number of waves per compute unit supported by the /// subtarget without any kind of limitation. unsigned getMaxWavesPerCU() const { - return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits()); + return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits()); } /// \returns Maximum number of waves per compute unit supported by the /// subtarget and limited by given \p FlatWorkGroupSize. unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(), + return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize); } - /// \returns Minimum number of waves per execution unit supported by the - /// subtarget. - unsigned getMinWavesPerEU() const { - return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits()); - } - /// \returns Maximum number of waves per execution unit supported by the /// subtarget without any kind of limitation. unsigned getMaxWavesPerEU() const { - return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits()); - } - - /// \returns Maximum number of waves per execution unit supported by the - /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(), - FlatWorkGroupSize); - } - - /// \returns Minimum flat work group size supported by the subtarget. - unsigned getMinFlatWorkGroupSize() const { - return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits()); - } - - /// \returns Maximum flat work group size supported by the subtarget. - unsigned getMaxFlatWorkGroupSize() const { - return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits()); + return AMDGPU::IsaInfo::getMaxWavesPerEU(); } /// \returns Number of waves per work group supported by the subtarget and /// limited by given \p FlatWorkGroupSize. unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(), - FlatWorkGroupSize); - } - - /// \returns Default range flat work group size for a calling convention. - std::pair getDefaultFlatWorkGroupSize(CallingConv::ID CC) const; - - /// \returns Subtarget's default pair of minimum/maximum flat work group sizes - /// for function \p F, or minimum/maximum flat work group sizes explicitly - /// requested using "amdgpu-flat-work-group-size" attribute attached to - /// function \p F. - /// - /// \returns Subtarget's default values if explicitly requested values cannot - /// be converted to integer, or violate subtarget's specifications. - std::pair getFlatWorkGroupSizes(const Function &F) const; - - /// \returns Subtarget's default pair of minimum/maximum number of waves per - /// execution unit for function \p F, or minimum/maximum number of waves per - /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute - /// attached to function \p F. - /// - /// \returns Subtarget's default values if explicitly requested values cannot - /// be converted to integer, violate subtarget's specifications, or are not - /// compatible with minimum/maximum number of waves limited by flat work group - /// size, register usage, and/or lds usage. - std::pair getWavesPerEU(const Function &F) const; - - /// Creates value range metadata on an workitemid.* inrinsic call or load. - bool makeLIDRangeMetadata(Instruction *I) const; -}; - -class R600Subtarget final : public AMDGPUSubtarget { -private: - R600InstrInfo InstrInfo; - R600FrameLowering FrameLowering; - R600TargetLowering TLInfo; - -public: - R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, - const TargetMachine &TM); - - const R600InstrInfo *getInstrInfo() const override { - return &InstrInfo; - } - - const R600FrameLowering *getFrameLowering() const override { - return &FrameLowering; - } - - const R600TargetLowering *getTargetLowering() const override { - return &TLInfo; - } - - const R600RegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); - } - - bool hasCFAluBug() const { - return CFALUBug; - } - - bool hasVertexCache() const { - return HasVertexCache; - } - - short getTexVTXClauseSize() const { - return TexVTXClauseSize; + return AMDGPU::IsaInfo::getWavesPerWorkGroup( + MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize); } }; @@ -767,6 +778,8 @@ const SIRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); } + // static wrappers + static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI); // XXX - Why is this here if it isn't in the default pass set? bool enableEarlyIfConversion() const override { @@ -776,7 +789,7 @@ void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override; - bool isVGPRSpillingEnabled(const Function& F) const; + bool isVGPRSpillingEnabled(const Function &F) const; unsigned getMaxNumUserSGPRs() const { return 16; @@ -824,7 +837,7 @@ bool debuggerSupported() const { return debuggerInsertNops() && debuggerReserveRegs() && - debuggerEmitPrologue(); + debuggerEmitPrologue(); } bool debuggerInsertNops() const { @@ -866,16 +879,18 @@ unsigned getKernArgSegmentSize(const Function &F, unsigned ExplictArgBytes) const; - /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs + /// Return the maximum number of waves per SIMD for kernels using \p SGPRs + /// SGPRs unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; - /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs + /// Return the maximum number of waves per SIMD for kernels using \p VGPRs + /// VGPRs unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; /// \returns true if the flat_scratch register should be initialized with the /// pointer to the wave's scratch memory rather than a size and offset. bool flatScratchIsPointer() const { - return getGeneration() >= GFX9; + return getGeneration() >= AMDGPUSubtarget::GFX9; } /// \returns true if the machine has merged shaders in which s0-s7 are @@ -886,35 +901,39 @@ /// \returns SGPR allocation granularity supported by the subtarget. unsigned getSGPRAllocGranule() const { - return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits()); + return AMDGPU::IsaInfo::getSGPRAllocGranule( + MCSubtargetInfo::getFeatureBits()); } /// \returns SGPR encoding granularity supported by the subtarget. unsigned getSGPREncodingGranule() const { - return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits()); + return AMDGPU::IsaInfo::getSGPREncodingGranule( + MCSubtargetInfo::getFeatureBits()); } /// \returns Total number of SGPRs supported by the subtarget. unsigned getTotalNumSGPRs() const { - return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits()); + return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits()); } /// \returns Addressable number of SGPRs supported by the subtarget. unsigned getAddressableNumSGPRs() const { - return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits()); + return AMDGPU::IsaInfo::getAddressableNumSGPRs( + MCSubtargetInfo::getFeatureBits()); } /// \returns Minimum number of SGPRs that meets the given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMinNumSGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU); + return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(), + WavesPerEU); } /// \returns Maximum number of SGPRs that meets the given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { - return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU, - Addressable); + return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(), + WavesPerEU, Addressable); } /// \returns Reserved number of SGPRs for given function \p MF. @@ -932,34 +951,39 @@ /// \returns VGPR allocation granularity supported by the subtarget. unsigned getVGPRAllocGranule() const { - return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits()); + return AMDGPU::IsaInfo::getVGPRAllocGranule( + MCSubtargetInfo::getFeatureBits()); } /// \returns VGPR encoding granularity supported by the subtarget. unsigned getVGPREncodingGranule() const { - return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits()); + return AMDGPU::IsaInfo::getVGPREncodingGranule( + MCSubtargetInfo::getFeatureBits()); } /// \returns Total number of VGPRs supported by the subtarget. unsigned getTotalNumVGPRs() const { - return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits()); + return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits()); } /// \returns Addressable number of VGPRs supported by the subtarget. unsigned getAddressableNumVGPRs() const { - return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits()); + return AMDGPU::IsaInfo::getAddressableNumVGPRs( + MCSubtargetInfo::getFeatureBits()); } /// \returns Minimum number of VGPRs that meets given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMinNumVGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU); + return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(), + WavesPerEU); } /// \returns Maximum number of VGPRs that meets given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU); + return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(), + WavesPerEU); } /// \returns Reserved number of VGPRs for given function \p MF. @@ -982,6 +1006,133 @@ const override; }; + +class R600Subtarget final : public R600GenSubtargetInfo, + public AMDGPUCommonSubtarget { +public: + enum Generation { R600 = 0, R700 = 1, EVERGREEN = 2, NORTHERN_ISLANDS = 3 }; + +private: + R600InstrInfo InstrInfo; + R600FrameLowering FrameLowering; + R600TargetLowering TLInfo; + bool FMA; + bool CaymanISA; + bool CFALUBug; + bool DX10Clamp; + bool HasVertexCache; + bool R600ALUInst; + bool FP64; + short TexVTXClauseSize; + Generation Gen; + unsigned MaxPrivateElementSize; + int LDSBankCount; + InstrItineraryData InstrItins; + SelectionDAGTargetInfo TSInfo; + AMDGPUAS AS; + +public: + R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, + const TargetMachine &TM); + + const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; } + + const R600FrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + + const R600TargetLowering *getTargetLowering() const override { + return &TLInfo; + } + + const R600RegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); + } + + const InstrItineraryData *getInstrItineraryData() const override { + return &InstrItins; + } + + // Nothing implemented, just prevent crashes on use. + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + Generation getGeneration() const { + return Gen; + } + + unsigned getStackAlignment() const { + return 4; + } + + R600Subtarget &initializeSubtargetDependencies(const Triple &TT, + StringRef GPU, StringRef FS); + + bool hasBFE() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasBFI() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasBCNT(unsigned Size) const { + if (Size == 32) + return (getGeneration() >= EVERGREEN); + + return false; + } + + bool hasBORROW() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasCARRY() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasCaymanISA() const { + return CaymanISA; + } + + bool hasFFBL() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasFFBH() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasFMA() const { return FMA; } + + bool hasMed3_16() const { return false; } + + bool hasMin3Max3_16() { return false; } + + unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const { + return 36; + } + + bool hasCFAluBug() const { return CFALUBug; } + + bool hasVertexCache() const { return HasVertexCache; } + + short getTexVTXClauseSize() const { return TexVTXClauseSize; } + + AMDGPUAS getAMDGPUAS() const { return AS; } + + bool enableMachineScheduler() const override { + return true; + } + + bool enableSubRegLiveness() const override { + return true; + } +}; + } // end namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -23,6 +23,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/IR/MDBuilder.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include @@ -34,9 +35,39 @@ #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "AMDGPUGenSubtargetInfo.inc" +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "R600GenSubtargetInfo.inc" AMDGPUSubtarget::~AMDGPUSubtarget() = default; +R600Subtarget & +R600Subtarget::initializeSubtargetDependencies(const Triple &TT, + StringRef GPU, StringRef FS) { + SmallString<256> FullFS("+promote-alloca,+dx10-clamp,"); + FullFS += FS; + ParseSubtargetFeatures(GPU, FullFS); + + // FIXME: I don't think think Evergreen has any useful support for + // denormals, but should be checked. Should we issue a warning somewhere + // if someone tries to enable these? + if (getGeneration() <= R600Subtarget::NORTHERN_ISLANDS) { + FP32Denormals = false; + } + + // Set defaults if needed. + if (MaxPrivateElementSize == 0) + MaxPrivateElementSize = 4; + + if (LDSBankCount == 0) + LDSBankCount = 32; + + HasMulU24 = getGeneration() >= EVERGREEN; + HasMulI24 = hasCaymanISA(); + + return *this; +} + AMDGPUSubtarget & AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS) { @@ -93,26 +124,44 @@ HasMovrel = true; } + HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS; + return *this; } +AMDGPUCommonSubtarget::AMDGPUCommonSubtarget(const Triple &TT, + const FeatureBitset &FeatureBits) : + TargetTriple(TT), + SubtargetFeatureBits(FeatureBits), + Has16BitInsts(false), + HasMadMixInsts(false), + FP32Denormals(false), + FPExceptions(false), + HasSDWA(false), + HasVOP3PInsts(false), + HasMulI24(true), + HasMulU24(true), + HasFminFmaxLegacy(true), + EnablePromoteAlloca(false), + LocalMemorySize(0), + WavefrontSize(0) + { } + AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const TargetMachine &TM) - : AMDGPUGenSubtargetInfo(TT, GPU, FS), + const TargetMachine &TM) : + AMDGPUGenSubtargetInfo(TT, GPU, FS), + AMDGPUCommonSubtarget(TT, getFeatureBits()), + FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), TargetTriple(TT), - Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), + Gen(SOUTHERN_ISLANDS), IsaVersion(ISAVersion0_0_0), - WavefrontSize(0), - LocalMemorySize(0), LDSBankCount(0), MaxPrivateElementSize(0), FastFMAF32(false), HalfRate64Ops(false), - FP32Denormals(false), FP64FP16Denormals(false), - FPExceptions(false), DX10Clamp(false), FlatForGlobal(false), AutoWaitcntBeforeBarrier(false), @@ -129,7 +178,6 @@ EnableHugePrivateBuffer(false), EnableVGPRSpilling(false), - EnablePromoteAlloca(false), EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false), EnableSIScheduler(false), @@ -137,25 +185,18 @@ DumpCode(false), FP64(false), - FMA(false), - MIMG_R128(false), - IsGCN(false), GCN3Encoding(false), CIInsts(false), GFX9Insts(false), SGPRInitBug(false), HasSMemRealTime(false), - Has16BitInsts(false), HasIntClamp(false), - HasVOP3PInsts(false), - HasMadMixInsts(false), HasFmaMixInsts(false), HasMovrel(false), HasVGPRIndexMode(false), HasScalarStores(false), HasScalarAtomics(false), HasInv2PiInlineImm(false), - HasSDWA(false), HasSDWAOmod(false), HasSDWAScalar(false), HasSDWASdst(false), @@ -171,20 +212,14 @@ AddNoCarryInsts(false), HasUnpackedD16VMem(false), - R600ALUInst(false), - CaymanISA(false), - CFALUBug(false), - HasVertexCache(false), - TexVTXClauseSize(0), ScalarizeGlobal(false), - FeatureDisable(false), - InstrItins(getInstrItineraryForCPU(GPU)) { + FeatureDisable(false) { AS = AMDGPU::getAMDGPUAS(TT); initializeSubtargetDependencies(TT, GPU, FS); } -unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves, +unsigned AMDGPUCommonSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves, const Function &F) const { if (NWaves == 1) return getLocalMemorySize(); @@ -194,7 +229,7 @@ return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves; } -unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes, +unsigned AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &F) const { unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second; unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize); @@ -207,13 +242,13 @@ } unsigned -AMDGPUSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const { +AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const { const auto *MFI = MF.getInfo(); return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction()); } std::pair -AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { +AMDGPUCommonSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { switch (CC) { case CallingConv::AMDGPU_CS: case CallingConv::AMDGPU_KERNEL: @@ -231,7 +266,7 @@ } } -std::pair AMDGPUSubtarget::getFlatWorkGroupSizes( +std::pair AMDGPUCommonSubtarget::getFlatWorkGroupSizes( const Function &F) const { // FIXME: 1024 if function. // Default minimum/maximum flat work group sizes. @@ -261,7 +296,7 @@ return Requested; } -std::pair AMDGPUSubtarget::getWavesPerEU( +std::pair AMDGPUCommonSubtarget::getWavesPerEU( const Function &F) const { // Default minimum/maximum number of waves per execution unit. std::pair Default(1, getMaxWavesPerEU()); @@ -309,7 +344,7 @@ return Requested; } -bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { +bool AMDGPUCommonSubtarget::makeLIDRangeMetadata(Instruction *I) const { Function *Kernel = I->getParent()->getParent(); unsigned MinSize = 0; unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second; @@ -373,10 +408,14 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM) : - AMDGPUSubtarget(TT, GPU, FS, TM), + R600GenSubtargetInfo(TT, GPU, FS), + AMDGPUCommonSubtarget(TT, getFeatureBits()), InstrInfo(*this), FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), - TLInfo(TM, *this) {} + TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)), + DX10Clamp(false), + InstrItins(getInstrItineraryForCPU(GPU)), + AS (AMDGPU::getAMDGPUAS(TT)) { } SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM) @@ -624,3 +663,17 @@ std::vector> &Mutations) const { Mutations.push_back(llvm::make_unique(&InstrInfo)); } + +const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const MachineFunction &MF) { + if (MF.getTarget().getTargetTriple().getArch() == Triple::amdgcn) + return static_cast(MF.getSubtarget()); + else + return static_cast(MF.getSubtarget()); +} + +const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const TargetMachine &TM, const Function &F) { + if (TM.getTargetTriple().getArch() == Triple::amdgcn) + return static_cast(TM.getSubtarget(F)); + else + return static_cast(TM.getSubtarget(F)); +} Index: lib/Target/AMDGPU/AMDGPUTargetMachine.h =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -34,7 +34,6 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { protected: std::unique_ptr TLOF; - AMDGPUIntrinsicInfo IntrinsicInfo; AMDGPUAS AS; StringRef getGPUName(const Function &F) const; @@ -49,12 +48,8 @@ CodeGenOpt::Level OL); ~AMDGPUTargetMachine() override; - const AMDGPUSubtarget *getSubtargetImpl() const; - const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override = 0; - - const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override { - return &IntrinsicInfo; - } + const TargetSubtargetInfo *getSubtargetImpl() const; + const TargetSubtargetInfo *getSubtargetImpl(const Function &) const override = 0; TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); @@ -103,6 +98,7 @@ class GCNTargetMachine final : public AMDGPUTargetMachine { private: + AMDGPUIntrinsicInfo IntrinsicInfo; mutable StringMap> SubtargetMap; public: @@ -117,6 +113,10 @@ TargetTransformInfo getTargetTransformInfo(const Function &F) override; + const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override { + return &IntrinsicInfo; + } + bool useIPRA() const override { return true; } Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -45,17 +45,12 @@ friend BaseT; - const AMDGPUSubtarget *ST; - const AMDGPUTargetLowering *TLI; + Triple TargetTriple; public: explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), - ST(TM->getSubtargetImpl(F)), - TLI(ST->getTargetLowering()) {} - - const AMDGPUSubtarget *getST() const { return ST; } - const AMDGPUTargetLowering *getTLI() const { return TLI; } + TargetTriple(TM->getTargetTriple()) {} void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); @@ -124,7 +119,7 @@ public: explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), - ST(TM->getSubtargetImpl(F)), + ST(static_cast(TM->getSubtargetImpl(F))), TLI(ST->getTargetLowering()), CommonTTI(TM, F), IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {} @@ -212,18 +207,18 @@ friend BaseT; - const AMDGPUSubtarget *ST; + const R600Subtarget *ST; const AMDGPUTargetLowering *TLI; AMDGPUTTIImpl CommonTTI; public: explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), - ST(TM->getSubtargetImpl(F)), + ST(static_cast(TM->getSubtargetImpl(F))), TLI(ST->getTargetLowering()), CommonTTI(TM, F) {} - const AMDGPUSubtarget *getST() const { return ST; } + const R600Subtarget *getST() const { return ST; } const AMDGPUTargetLowering *getTLI() const { return TLI; } void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -102,7 +102,7 @@ unsigned ThresholdPrivate = UnrollThresholdPrivate; unsigned ThresholdLocal = UnrollThresholdLocal; unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal); - AMDGPUAS ASST = ST->getAMDGPUAS(); + const AMDGPUAS &ASST = AMDGPU::getAMDGPUAS(TargetTriple); for (const BasicBlock *BB : L->getBlocks()) { const DataLayout &DL = BB->getModule()->getDataLayout(); unsigned LocalGEPsSeen = 0; Index: lib/Target/AMDGPU/AMDILCFGStructurizer.cpp =================================================================== --- lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -432,19 +432,19 @@ for (;; --I) { if (I == MBB.end()) continue; - if (I->getOpcode() == AMDGPU::PRED_X) { + if (I->getOpcode() == R600::PRED_X) { switch (I->getOperand(2).getImm()) { - case AMDGPU::PRED_SETE_INT: - I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT); + case R600::PRED_SETE_INT: + I->getOperand(2).setImm(R600::PRED_SETNE_INT); return; - case AMDGPU::PRED_SETNE_INT: - I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT); + case R600::PRED_SETNE_INT: + I->getOperand(2).setImm(R600::PRED_SETE_INT); return; - case AMDGPU::PRED_SETE: - I->getOperand(2).setImm(AMDGPU::PRED_SETNE); + case R600::PRED_SETE: + I->getOperand(2).setImm(R600::PRED_SETNE); return; - case AMDGPU::PRED_SETNE: - I->getOperand(2).setImm(AMDGPU::PRED_SETE); + case R600::PRED_SETNE: + I->getOperand(2).setImm(R600::PRED_SETE); return; default: llvm_unreachable("PRED_X Opcode invalid!"); @@ -513,10 +513,10 @@ int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) { switch(OldOpcode) { - case AMDGPU::JUMP_COND: - case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; - case AMDGPU::BRANCH_COND_i32: - case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; + case R600::JUMP_COND: + case R600::JUMP: return R600::IF_PREDICATE_SET; + case R600::BRANCH_COND_i32: + case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32; default: llvm_unreachable("internal error"); } return -1; @@ -524,10 +524,10 @@ int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) { switch(OldOpcode) { - case AMDGPU::JUMP_COND: - case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; - case AMDGPU::BRANCH_COND_i32: - case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; + case R600::JUMP_COND: + case R600::JUMP: return R600::IF_PREDICATE_SET; + case R600::BRANCH_COND_i32: + case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32; default: llvm_unreachable("internal error"); } return -1; @@ -535,8 +535,8 @@ int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) { switch(OldOpcode) { - case AMDGPU::JUMP_COND: - case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; + case R600::JUMP_COND: + case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32; default: llvm_unreachable("internal error"); } return -1; @@ -544,8 +544,8 @@ int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) { switch(OldOpcode) { - case AMDGPU::JUMP_COND: - case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; + case R600::JUMP_COND: + case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32; default: llvm_unreachable("internal error"); } return -1; @@ -573,9 +573,9 @@ bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) { switch (MI->getOpcode()) { - case AMDGPU::JUMP_COND: - case AMDGPU::BRANCH_COND_i32: - case AMDGPU::BRANCH_COND_f32: return true; + case R600::JUMP_COND: + case R600::BRANCH_COND_i32: + case R600::BRANCH_COND_f32: return true; default: return false; } @@ -584,8 +584,8 @@ bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) { switch (MI->getOpcode()) { - case AMDGPU::JUMP: - case AMDGPU::BRANCH: + case R600::JUMP: + case R600::BRANCH: return true; default: return false; @@ -634,7 +634,7 @@ MachineBasicBlock::reverse_iterator It = MBB->rbegin(); if (It != MBB->rend()) { MachineInstr *instr = &(*It); - if (instr->getOpcode() == AMDGPU::RETURN) + if (instr->getOpcode() == R600::RETURN) return instr; } return nullptr; @@ -687,8 +687,8 @@ MachineBasicBlock::iterator E = MBB->end(); MachineBasicBlock::iterator It = Pre; while (It != E) { - if (Pre->getOpcode() == AMDGPU::CONTINUE - && It->getOpcode() == AMDGPU::ENDLOOP) + if (Pre->getOpcode() == R600::CONTINUE + && It->getOpcode() == R600::ENDLOOP) ContInstr.push_back(&*Pre); Pre = It; ++It; @@ -1303,15 +1303,15 @@ bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2); - //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" - MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF); + //insert R600::ENDIF to avoid special case "input landBlk == NULL" + MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF); if (LandBlkHasOtherPred) { report_fatal_error("Extra register needed to handle CFG"); unsigned CmpResReg = HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); report_fatal_error("Extra compare instruction needed to handle CFG"); - insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, + insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, CmpResReg, DebugLoc()); } @@ -1319,7 +1319,7 @@ // cause an assertion failure in the PostRA scheduling pass. unsigned InitReg = HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); - insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg, + insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg, DebugLoc()); if (MigrateTrue) { @@ -1329,7 +1329,7 @@ // (initVal != 1). report_fatal_error("Extra register needed to handle CFG"); } - insertInstrBefore(I, AMDGPU::ELSE); + insertInstrBefore(I, R600::ELSE); if (MigrateFalse) { migrateInstruction(FalseMBB, LandBlk, I); @@ -1341,7 +1341,7 @@ if (LandBlkHasOtherPred) { // add endif - insertInstrBefore(I, AMDGPU::ENDIF); + insertInstrBefore(I, R600::ENDIF); // put initReg = 2 to other predecessors of landBlk for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(), @@ -1414,7 +1414,7 @@ } if (FalseMBB) { - insertInstrBefore(I, AMDGPU::ELSE); + insertInstrBefore(I, R600::ELSE); MBB->splice(I, FalseMBB, FalseMBB->begin(), FalseMBB->end()); MBB->removeSuccessor(FalseMBB, true); @@ -1423,7 +1423,7 @@ retireBlock(FalseMBB); MLI->removeBlock(FalseMBB); } - insertInstrBefore(I, AMDGPU::ENDIF); + insertInstrBefore(I, R600::ENDIF); BranchMI->eraseFromParent(); @@ -1436,8 +1436,8 @@ LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() << " land = BB" << LandMBB->getNumber() << "\n";); - insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc()); - insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc()); + insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc()); + insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc()); DstBlk->replaceSuccessor(DstBlk, LandMBB); } @@ -1453,9 +1453,9 @@ MachineBasicBlock::iterator I = BranchMI; if (TrueBranch != LandMBB) reversePredicateSetter(I, *I->getParent()); - insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL); - insertInstrBefore(I, AMDGPU::BREAK); - insertInstrBefore(I, AMDGPU::ENDIF); + insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL); + insertInstrBefore(I, R600::BREAK); + insertInstrBefore(I, R600::ENDIF); //now branchInst can be erase safely BranchMI->eraseFromParent(); //now take care of successors, retire blocks @@ -1484,8 +1484,8 @@ getBranchZeroOpcode(OldOpcode); insertCondBranchBefore(I, BranchOpcode, DL); // insertEnd to ensure phi-moves, if exist, go before the continue-instr. - insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL); - insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL); + insertInstrEnd(ContingMBB, R600::CONTINUE, DL); + insertInstrEnd(ContingMBB, R600::ENDIF, DL); } else { int BranchOpcode = TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) : @@ -1500,7 +1500,7 @@ // location we've just inserted that reference here so it should be // representative insertEnd to ensure phi-moves, if exist, go before the // continue-instr. - insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, + insertInstrEnd(ContingMBB, R600::CONTINUE, getLastDebugLocInBB(ContingMBB)); } } @@ -1627,7 +1627,7 @@ SmallVectorImpl &RetMBB) { MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); FuncRep->push_back(DummyExitBlk); //insert to function - insertInstrEnd(DummyExitBlk, AMDGPU::RETURN); + insertInstrEnd(DummyExitBlk, R600::RETURN); for (SmallVectorImpl::iterator It = RetMBB.begin(), E = RetMBB.end(); It != E; ++It) { Index: lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/CMakeLists.txt +++ lib/Target/AMDGPU/CMakeLists.txt @@ -4,7 +4,6 @@ tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv) tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel) -tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer) tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info) tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic) @@ -18,6 +17,17 @@ set(LLVM_TARGET_DEFINITIONS AMDGPUGISel.td) tablegen(LLVM AMDGPUGenGlobalISel.inc -gen-global-isel) +set(LLVM_TARGET_DEFINITIONS R600.td) +tablegen(LLVM R600GenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM R600GenCallingConv.inc -gen-callingconv) +tablegen(LLVM R600GenDAGISel.inc -gen-dag-isel) +tablegen(LLVM R600GenDFAPacketizer.inc -gen-dfa-packetizer) +tablegen(LLVM R600GenInstrInfo.inc -gen-instr-info) +tablegen(LLVM R600GenIntrinsics.inc -gen-tgt-intrinsic) +tablegen(LLVM R600GenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM R600GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM R600GenSubtargetInfo.inc -gen-subtarget) + add_public_tablegen_target(AMDGPUCommonTableGen) add_llvm_target(AMDGPUCodeGen Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -20,6 +20,7 @@ #include "Disassembler/AMDGPUDisassembler.h" #include "AMDGPU.h" #include "AMDGPURegisterInfo.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "Utils/AMDGPUBaseInfo.h" Index: lib/Target/AMDGPU/EvergreenInstructions.td =================================================================== --- lib/Target/AMDGPU/EvergreenInstructions.td +++ lib/Target/AMDGPU/EvergreenInstructions.td @@ -14,14 +14,13 @@ //===----------------------------------------------------------------------===// def isEG : Predicate< - "Subtarget->getGeneration() >= AMDGPUSubtarget::EVERGREEN && " - "Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && " + "Subtarget->getGeneration() >= R600Subtarget::EVERGREEN && " "!Subtarget->hasCaymanISA()" >; def isEGorCayman : Predicate< - "Subtarget->getGeneration() == AMDGPUSubtarget::EVERGREEN ||" - "Subtarget->getGeneration() == AMDGPUSubtarget::NORTHERN_ISLANDS" + "Subtarget->getGeneration() == R600Subtarget::EVERGREEN ||" + "Subtarget->getGeneration() == R600Subtarget::NORTHERN_ISLANDS" >; class EGPat : AMDGPUPat { Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -216,13 +216,16 @@ raw_ostream &O); }; -// FIXME: R600 specific parts of AMDGPUInstrPrinter should be moved here, and -// MCTargetDesc should be using R600InstPrinter for the R600 target. -class R600InstPrinter : public AMDGPUInstPrinter { +class R600InstPrinter : public MCInstPrinter { public: R600InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI) - : AMDGPUInstPrinter(MAI, MII, MRI) {} + : MCInstPrinter(MAI, MII, MRI) {} + + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O); Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -505,11 +505,6 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - if (!STI.getFeatureBits()[AMDGPU::FeatureGCN]) { - static_cast(this)->printOperand(MI, OpNo, O); - return; - } - if (OpNo >= MI->getNumOperands()) { O << "/*Missing OP" << OpNo << "*/"; return; @@ -960,11 +955,6 @@ void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - if (!STI.getFeatureBits()[AMDGPU::FeatureGCN]) { - static_cast(this)->printMemOperand(MI, OpNo, O); - return; - } - printOperand(MI, OpNo, STI, O); O << ", "; printOperand(MI, OpNo + 1, STI, O); @@ -990,16 +980,6 @@ O << Asm; } -void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O) { - static_cast(this)->printAbs(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O) { - static_cast(this)->printClamp(MI, OpNo, O); -} - void AMDGPUInstPrinter::printHigh(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -1026,70 +1006,6 @@ O << " div:2"; } -void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, - raw_ostream &O) { - static_cast(this)->printLiteral(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O) { - static_cast(this)->printLast(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O) { - static_cast(this)->printNeg(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O) { - static_cast(this)->printOMOD(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O) { - static_cast(this)->printRel(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, - raw_ostream &O) { - static_cast(this)->printUpdateExecMask(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, - raw_ostream &O) { - static_cast(this)->printUpdatePred(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O) { - static_cast(this)->printWrite(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, - raw_ostream &O) { - static_cast(this)->printBankSwizzle(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printRSel(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O) { - static_cast(this)->printRSel(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printCT(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O) { - static_cast(this)->printCT(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O) { - static_cast(this)->printKCache(MI, OpNo, O); -} - void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -1294,6 +1210,13 @@ #include "AMDGPUGenAsmWriter.inc" +void R600InstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot, const MCSubtargetInfo &STI) { + O.flush(); + printInstruction(MI, O); + printAnnotation(O, Annot); +} + void R600InstPrinter::printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O) { AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '|'); @@ -1412,7 +1335,7 @@ if (Op.isReg()) { switch (Op.getReg()) { // This is the default predicate state, so we don't need to print it. - case AMDGPU::PRED_SEL_OFF: + case R600::PRED_SEL_OFF: break; default: @@ -1488,3 +1411,5 @@ O << " (MASKED)"; } } + +#include "R600GenAsmWriter.inc" Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -40,6 +40,7 @@ MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); +MCInstrInfo *createR600MCInstrInfo(); MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, @@ -59,6 +60,10 @@ #include "AMDGPUGenRegisterInfo.inc" #undef GET_REGINFO_ENUM +#define GET_REGINFO_ENUM +#include "R600GenRegisterInfo.inc" +#undef GET_REGINFO_ENUM + #define GET_INSTRINFO_ENUM #define GET_INSTRINFO_OPERAND_ENUM #define GET_INSTRINFO_SCHED_ENUM @@ -67,9 +72,20 @@ #undef GET_INSTRINFO_OPERAND_ENUM #undef GET_INSTRINFO_ENUM +#define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_OPERAND_ENUM +#define GET_INSTRINFO_SCHED_ENUM +#include "R600GenInstrInfo.inc" +#undef GET_INSTRINFO_SCHED_ENUM +#undef GET_INSTRINFO_OPERAND_ENUM +#undef GET_INSTRINFO_ENUM #define GET_SUBTARGETINFO_ENUM #include "AMDGPUGenSubtargetInfo.inc" #undef GET_SUBTARGETINFO_ENUM +#define GET_SUBTARGETINFO_ENUM +#include "R600GenSubtargetInfo.inc" +#undef GET_SUBTARGETINFO_ENUM + #endif Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -38,9 +38,17 @@ #define GET_SUBTARGETINFO_MC_DESC #include "AMDGPUGenSubtargetInfo.inc" +#define NoSchedModel NoSchedModelR600 +#define GET_SUBTARGETINFO_MC_DESC +#include "R600GenSubtargetInfo.inc" +#undef NoSchedModelR600 + #define GET_REGINFO_MC_DESC #include "AMDGPUGenRegisterInfo.inc" +#define GET_REGINFO_MC_DESC +#include "R600GenRegisterInfo.inc" + static MCInstrInfo *createAMDGPUMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitAMDGPUMCInstrInfo(X); @@ -49,12 +57,17 @@ static MCRegisterInfo *createAMDGPUMCRegisterInfo(const Triple &TT) { MCRegisterInfo *X = new MCRegisterInfo(); - InitAMDGPUMCRegisterInfo(X, 0); + if (TT.getArch() == Triple::r600) + InitR600MCRegisterInfo(X, 0); + else + InitAMDGPUMCRegisterInfo(X, 0); return X; } static MCSubtargetInfo * createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { + if (TT.getArch() == Triple::r600) + return createR600MCSubtargetInfoImpl(TT, CPU, FS); return createAMDGPUMCSubtargetInfoImpl(TT, CPU, FS); } @@ -63,8 +76,10 @@ const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI) { - return T.getArch() == Triple::r600 ? new R600InstPrinter(MAI, MII, MRI) : - new AMDGPUInstPrinter(MAI, MII, MRI); + if (T.getArch() == Triple::r600) + return new R600InstPrinter(MAI, MII, MRI); + else + return new AMDGPUInstPrinter(MAI, MII, MRI); } static MCTargetStreamer *createAMDGPUAsmTargetStreamer(MCStreamer &S, @@ -90,10 +105,12 @@ } extern "C" void LLVMInitializeAMDGPUTargetMC() { + + TargetRegistry::RegisterMCInstrInfo(getTheGCNTarget(), createAMDGPUMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(getTheAMDGPUTarget(), createR600MCInstrInfo); for (Target *T : {&getTheAMDGPUTarget(), &getTheGCNTarget()}) { RegisterMCAsmInfo X(*T); - TargetRegistry::RegisterMCInstrInfo(*T, createAMDGPUMCInstrInfo); TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo); TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo); TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter); Index: lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt +++ lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt @@ -8,5 +8,6 @@ AMDGPUMCTargetDesc.cpp AMDGPUTargetStreamer.cpp R600MCCodeEmitter.cpp + R600MCTargetDesc.cpp SIMCCodeEmitter.cpp ) Index: lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -15,7 +15,6 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/AMDGPUFixupKinds.h" -#include "MCTargetDesc/AMDGPUMCCodeEmitter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600Defines.h" #include "llvm/MC/MCCodeEmitter.h" @@ -36,30 +35,40 @@ namespace { -class R600MCCodeEmitter : public AMDGPUMCCodeEmitter { +class R600MCCodeEmitter : public MCCodeEmitter { const MCRegisterInfo &MRI; + const MCInstrInfo &MCII; public: R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri) - : AMDGPUMCCodeEmitter(mcii), MRI(mri) {} + : MRI(mri), MCII(mcii) {} R600MCCodeEmitter(const R600MCCodeEmitter &) = delete; R600MCCodeEmitter &operator=(const R600MCCodeEmitter &) = delete; /// Encode the instruction and write it to the OS. void encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const override; + const MCSubtargetInfo &STI) const; /// \returns the encoding for an MCOperand. uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const override; + const MCSubtargetInfo &STI) const; private: + void Emit(uint32_t value, raw_ostream &OS) const; void Emit(uint64_t value, raw_ostream &OS) const; unsigned getHWReg(unsigned regNo) const; + + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + uint64_t computeAvailableFeatures(const FeatureBitset &FB) const; + void verifyInstructionPredicates(const MCInst &MI, + uint64_t AvailableFeatures) const; + }; } // end anonymous namespace @@ -94,16 +103,16 @@ computeAvailableFeatures(STI.getFeatureBits())); const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); - if (MI.getOpcode() == AMDGPU::RETURN || - MI.getOpcode() == AMDGPU::FETCH_CLAUSE || - MI.getOpcode() == AMDGPU::ALU_CLAUSE || - MI.getOpcode() == AMDGPU::BUNDLE || - MI.getOpcode() == AMDGPU::KILL) { + if (MI.getOpcode() == R600::RETURN || + MI.getOpcode() == R600::FETCH_CLAUSE || + MI.getOpcode() == R600::ALU_CLAUSE || + MI.getOpcode() == R600::BUNDLE || + MI.getOpcode() == R600::KILL) { return; } else if (IS_VTX(Desc)) { uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups, STI); uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset - if (!(STI.getFeatureBits()[AMDGPU::FeatureCaymanISA])) { + if (!(STI.getFeatureBits()[R600::FeatureCaymanISA])) { InstWord2 |= 1 << 19; // Mega-Fetch bit } @@ -136,7 +145,7 @@ Emit((uint32_t) 0, OS); } else { uint64_t Inst = getBinaryCodeForInstr(MI, Fixups, STI); - if ((STI.getFeatureBits()[AMDGPU::FeatureR600ALUInst]) && + if ((STI.getFeatureBits()[R600::FeatureR600ALUInst]) && ((Desc.TSFlags & R600_InstFlag::OP1) || Desc.TSFlags & R600_InstFlag::OP2)) { uint64_t ISAOpCode = Inst & (0x3FFULL << 39); @@ -186,4 +195,4 @@ } #define ENABLE_INSTR_PREDICATE_VERIFIER -#include "AMDGPUGenMCCodeEmitter.inc" +#include "R600GenMCCodeEmitter.inc" Index: lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp @@ -0,0 +1,27 @@ +//===-- R600MCTargetDesc.cpp - R600 Target Descriptions -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief This file provides R600 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUMCTargetDesc.h" +#include "llvm/MC/MCInstrInfo.h" + +using namespace llvm; + +#define GET_INSTRINFO_MC_DESC +#include "R600GenInstrInfo.inc" + +MCInstrInfo *llvm::createR600MCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitR600MCInstrInfo(X); + return X; +} Index: lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -438,3 +438,6 @@ llvm_unreachable("Encoding of this operand type is not supported yet."); return 0; } + +#define ENABLE_INSTR_PREDICATE_VERIFIER +#include "AMDGPUGenMCCodeEmitter.inc" Index: lib/Target/AMDGPU/R600.td =================================================================== --- /dev/null +++ lib/Target/AMDGPU/R600.td @@ -0,0 +1,51 @@ + +include "llvm/Target/Target.td" + +def R600InstrInfo : InstrInfo { + let guessInstructionProperties = 1; + let noNamedPositionallyEncodedOperands = 1; +} + +def R600 : Target { + let InstructionSet = R600InstrInfo; + let AllowRegisterRenaming = 1; +} + +let Namespace = "R600" in { + +foreach Index = 0-15 in { + def sub#Index : SubRegIndex<32, !shl(Index, 5)>; +} + +include "R600RegisterInfo.td" + +} + +def NullALU : InstrItinClass; +def ALU_NULL : FuncUnit; + +include "AMDGPUFeatures.td" +include "R600Schedule.td" +include "R600Processors.td" +include "AMDGPUInstrInfo.td" +include "AMDGPUInstructions.td" +include "R600Instructions.td" +include "R700Instructions.td" +include "EvergreenInstructions.td" +include "CaymanInstructions.td" + +// Calling convention for R600 +def CC_R600 : CallingConv<[ + CCIfInReg>> +]>; + +// Calling convention for compute kernels +def CC_R600_Kernel : CallingConv<[ + CCCustom<"allocateKernArg"> +]>; Index: lib/Target/AMDGPU/R600AsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/R600AsmPrinter.cpp +++ lib/Target/AMDGPU/R600AsmPrinter.cpp @@ -51,7 +51,7 @@ for (const MachineBasicBlock &MBB : MF) { for (const MachineInstr &MI : MBB) { - if (MI.getOpcode() == AMDGPU::KILLGT) + if (MI.getOpcode() == R600::KILLGT) killPixel = true; unsigned numOperands = MI.getNumOperands(); for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { Index: lib/Target/AMDGPU/R600ClauseMergePass.cpp =================================================================== --- lib/Target/AMDGPU/R600ClauseMergePass.cpp +++ lib/Target/AMDGPU/R600ClauseMergePass.cpp @@ -34,8 +34,8 @@ static bool isCFAlu(const MachineInstr &MI) { switch (MI.getOpcode()) { - case AMDGPU::CF_ALU: - case AMDGPU::CF_ALU_PUSH_BEFORE: + case R600::CF_ALU: + case R600::CF_ALU_PUSH_BEFORE: return true; default: return false; @@ -85,20 +85,20 @@ unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const { assert(isCFAlu(MI)); return MI - .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT)) + .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::COUNT)) .getImm(); } bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const { assert(isCFAlu(MI)); return MI - .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled)) + .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::Enabled)) .getImm(); } void R600ClauseMergePass::cleanPotentialDisabledCFAlu( MachineInstr &CFAlu) const { - int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); + int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT); MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end(); I++; do { @@ -117,7 +117,7 @@ bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu, const MachineInstr &LatrCFAlu) const { assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu)); - int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); + int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT); unsigned RootInstCount = getCFAluSize(RootCFAlu), LaterInstCount = getCFAluSize(LatrCFAlu); unsigned CumuledInsts = RootInstCount + LaterInstCount; @@ -125,15 +125,15 @@ LLVM_DEBUG(dbgs() << "Excess inst counts\n"); return false; } - if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) + if (RootCFAlu.getOpcode() == R600::CF_ALU_PUSH_BEFORE) return false; // Is KCache Bank 0 compatible ? int Mode0Idx = - TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0); + TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE0); int KBank0Idx = - TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0); + TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK0); int KBank0LineIdx = - TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0); + TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR0); if (LatrCFAlu.getOperand(Mode0Idx).getImm() && RootCFAlu.getOperand(Mode0Idx).getImm() && (LatrCFAlu.getOperand(KBank0Idx).getImm() != @@ -145,11 +145,11 @@ } // Is KCache Bank 1 compatible ? int Mode1Idx = - TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1); + TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE1); int KBank1Idx = - TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1); + TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK1); int KBank1LineIdx = - TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1); + TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR1); if (LatrCFAlu.getOperand(Mode1Idx).getImm() && RootCFAlu.getOperand(Mode1Idx).getImm() && (LatrCFAlu.getOperand(KBank1Idx).getImm() != Index: lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp =================================================================== --- lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -94,7 +94,7 @@ } bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { - if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() && + if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() && getLoopDepth() > 1) return true; @@ -103,10 +103,10 @@ switch(Opcode) { default: return false; - case AMDGPU::CF_ALU_PUSH_BEFORE: - case AMDGPU::CF_ALU_ELSE_AFTER: - case AMDGPU::CF_ALU_BREAK: - case AMDGPU::CF_ALU_CONTINUE: + case R600::CF_ALU_PUSH_BEFORE: + case R600::CF_ALU_ELSE_AFTER: + case R600::CF_ALU_BREAK: + case R600::CF_ALU_CONTINUE: if (CurrentSubEntries == 0) return false; if (ST->getWavefrontSize() == 64) { @@ -168,8 +168,8 @@ void CFStack::pushBranch(unsigned Opcode, bool isWQM) { CFStack::StackItem Item = CFStack::ENTRY; switch(Opcode) { - case AMDGPU::CF_PUSH_EG: - case AMDGPU::CF_ALU_PUSH_BEFORE: + case R600::CF_PUSH_EG: + case R600::CF_ALU_PUSH_BEFORE: if (!isWQM) { if (!ST->hasCaymanISA() && !branchStackContains(CFStack::FIRST_NON_WQM_PUSH)) @@ -240,8 +240,8 @@ bool IsTrivialInst(MachineInstr &MI) const { switch (MI.getOpcode()) { - case AMDGPU::KILL: - case AMDGPU::RETURN: + case R600::KILL: + case R600::RETURN: return true; default: return false; @@ -253,41 +253,41 @@ bool isEg = (ST->getGeneration() >= R600Subtarget::EVERGREEN); switch (CFI) { case CF_TC: - Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600; + Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600; break; case CF_VC: - Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600; + Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600; break; case CF_CALL_FS: - Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600; + Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600; break; case CF_WHILE_LOOP: - Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600; + Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600; break; case CF_END_LOOP: - Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600; + Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600; break; case CF_LOOP_BREAK: - Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600; + Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600; break; case CF_LOOP_CONTINUE: - Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600; + Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600; break; case CF_JUMP: - Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600; + Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600; break; case CF_ELSE: - Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600; + Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600; break; case CF_POP: - Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600; + Opcode = isEg ? R600::POP_EG : R600::POP_R600; break; case CF_END: if (ST->hasCaymanISA()) { - Opcode = AMDGPU::CF_END_CM; + Opcode = R600::CF_END_CM; break; } - Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600; + Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600; break; } assert (Opcode && "No opcode selected"); @@ -305,21 +305,21 @@ continue; if (MO.isDef()) { unsigned Reg = MO.getReg(); - if (AMDGPU::R600_Reg128RegClass.contains(Reg)) + if (R600::R600_Reg128RegClass.contains(Reg)) DstMI = Reg; else DstMI = TRI->getMatchingSuperReg(Reg, AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), - &AMDGPU::R600_Reg128RegClass); + &R600::R600_Reg128RegClass); } if (MO.isUse()) { unsigned Reg = MO.getReg(); - if (AMDGPU::R600_Reg128RegClass.contains(Reg)) + if (R600::R600_Reg128RegClass.contains(Reg)) SrcMI = Reg; else SrcMI = TRI->getMatchingSuperReg(Reg, AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), - &AMDGPU::R600_Reg128RegClass); + &R600::R600_Reg128RegClass); } } if ((DstRegs.find(SrcMI) == DstRegs.end())) { @@ -359,15 +359,15 @@ void getLiteral(MachineInstr &MI, std::vector &Lits) const { static const unsigned LiteralRegs[] = { - AMDGPU::ALU_LITERAL_X, - AMDGPU::ALU_LITERAL_Y, - AMDGPU::ALU_LITERAL_Z, - AMDGPU::ALU_LITERAL_W + R600::ALU_LITERAL_X, + R600::ALU_LITERAL_Y, + R600::ALU_LITERAL_Z, + R600::ALU_LITERAL_W }; const SmallVector, 3> Srcs = TII->getSrcs(MI); for (const auto &Src:Srcs) { - if (Src.first->getReg() != AMDGPU::ALU_LITERAL_X) + if (Src.first->getReg() != R600::ALU_LITERAL_X) continue; int64_t Imm = Src.second; std::vector::iterator It = @@ -377,7 +377,7 @@ // Get corresponding Operand MachineOperand &Operand = MI.getOperand( - TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal)); + TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal)); if (It != Lits.end()) { // Reuse existing literal reg @@ -400,7 +400,7 @@ unsigned LiteralPair0 = Literals[i]; unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0; InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), - TII->get(AMDGPU::LITERALS)) + TII->get(R600::LITERALS)) .addImm(LiteralPair0) .addImm(LiteralPair1); } @@ -442,7 +442,7 @@ } for (unsigned i = 0, e = Literals.size(); i < e; i += 2) { MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(), - TII->get(AMDGPU::LITERALS)); + TII->get(R600::LITERALS)); if (Literals[i]->isImm()) { MILit.addImm(Literals[i]->getImm()); } else { @@ -471,7 +471,7 @@ unsigned &CfCount) { CounterPropagateAddr(*Clause.first, CfCount); MachineBasicBlock *BB = Clause.first->getParent(); - BuildMI(BB, DL, TII->get(AMDGPU::FETCH_CLAUSE)).addImm(CfCount); + BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount); for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { BB->splice(InsertPos, BB, Clause.second[i]); } @@ -483,7 +483,7 @@ Clause.first->getOperand(0).setImm(0); CounterPropagateAddr(*Clause.first, CfCount); MachineBasicBlock *BB = Clause.first->getParent(); - BuildMI(BB, DL, TII->get(AMDGPU::ALU_CLAUSE)).addImm(CfCount); + BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount); for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { BB->splice(InsertPos, BB, Clause.second[i]); } @@ -540,34 +540,34 @@ } MachineBasicBlock::iterator MI = I; - if (MI->getOpcode() != AMDGPU::ENDIF) + if (MI->getOpcode() != R600::ENDIF) LastAlu.back() = nullptr; - if (MI->getOpcode() == AMDGPU::CF_ALU) + if (MI->getOpcode() == R600::CF_ALU) LastAlu.back() = &*MI; I++; bool RequiresWorkAround = CFStack.requiresWorkAroundForInst(MI->getOpcode()); switch (MI->getOpcode()) { - case AMDGPU::CF_ALU_PUSH_BEFORE: + case R600::CF_ALU_PUSH_BEFORE: if (RequiresWorkAround) { LLVM_DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n"); - BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG)) + BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG)) .addImm(CfCount + 1) .addImm(1); - MI->setDesc(TII->get(AMDGPU::CF_ALU)); + MI->setDesc(TII->get(R600::CF_ALU)); CfCount++; - CFStack.pushBranch(AMDGPU::CF_PUSH_EG); + CFStack.pushBranch(R600::CF_PUSH_EG); } else - CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE); + CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE); LLVM_FALLTHROUGH; - case AMDGPU::CF_ALU: + case R600::CF_ALU: I = MI; AluClauses.push_back(MakeALUClause(MBB, I)); LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); CfCount++; break; - case AMDGPU::WHILELOOP: { + case R600::WHILELOOP: { CFStack.pushLoop(); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_WHILE_LOOP)) @@ -580,7 +580,7 @@ CfCount++; break; } - case AMDGPU::ENDLOOP: { + case R600::ENDLOOP: { CFStack.popLoop(); std::pair> Pair = std::move(LoopStack.back()); @@ -592,7 +592,7 @@ CfCount++; break; } - case AMDGPU::IF_PREDICATE_SET: { + case R600::IF_PREDICATE_SET: { LastAlu.push_back(nullptr); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP)) @@ -604,7 +604,7 @@ CfCount++; break; } - case AMDGPU::ELSE: { + case R600::ELSE: { MachineInstr * JumpInst = IfThenElseStack.back(); IfThenElseStack.pop_back(); CounterPropagateAddr(*JumpInst, CfCount); @@ -618,7 +618,7 @@ CfCount++; break; } - case AMDGPU::ENDIF: { + case R600::ENDIF: { CFStack.popBranch(); if (LastAlu.back()) { ToPopAfter.push_back(LastAlu.back()); @@ -640,7 +640,7 @@ MI->eraseFromParent(); break; } - case AMDGPU::BREAK: { + case R600::BREAK: { CfCount ++; MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_LOOP_BREAK)) @@ -649,7 +649,7 @@ MI->eraseFromParent(); break; } - case AMDGPU::CONTINUE: { + case R600::CONTINUE: { MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_LOOP_CONTINUE)) .addImm(0); @@ -658,12 +658,12 @@ CfCount++; break; } - case AMDGPU::RETURN: { + case R600::RETURN: { DebugLoc DL = MBB.findDebugLoc(MI); BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END)); CfCount++; if (CfCount % 2) { - BuildMI(MBB, I, DL, TII->get(AMDGPU::PAD)); + BuildMI(MBB, I, DL, TII->get(R600::PAD)); CfCount++; } MI->eraseFromParent(); @@ -684,7 +684,7 @@ for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) { MachineInstr *Alu = ToPopAfter[i]; BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu), - TII->get(AMDGPU::CF_ALU_POP_AFTER)) + TII->get(R600::CF_ALU_POP_AFTER)) .addImm(Alu->getOperand(0).getImm()) .addImm(Alu->getOperand(1).getImm()) .addImm(Alu->getOperand(2).getImm()) Index: lib/Target/AMDGPU/R600EmitClauseMarkers.cpp =================================================================== --- lib/Target/AMDGPU/R600EmitClauseMarkers.cpp +++ lib/Target/AMDGPU/R600EmitClauseMarkers.cpp @@ -52,12 +52,12 @@ unsigned OccupiedDwords(MachineInstr &MI) const { switch (MI.getOpcode()) { - case AMDGPU::INTERP_PAIR_XY: - case AMDGPU::INTERP_PAIR_ZW: - case AMDGPU::INTERP_VEC_LOAD: - case AMDGPU::DOT_4: + case R600::INTERP_PAIR_XY: + case R600::INTERP_PAIR_ZW: + case R600::INTERP_VEC_LOAD: + case R600::DOT_4: return 4; - case AMDGPU::KILL: + case R600::KILL: return 0; default: break; @@ -77,7 +77,7 @@ E = MI.operands_end(); It != E; ++It) { MachineOperand &MO = *It; - if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) + if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X) ++NumLiteral; } return 1 + NumLiteral; @@ -89,12 +89,12 @@ if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode())) return true; switch (MI.getOpcode()) { - case AMDGPU::PRED_X: - case AMDGPU::INTERP_PAIR_XY: - case AMDGPU::INTERP_PAIR_ZW: - case AMDGPU::INTERP_VEC_LOAD: - case AMDGPU::COPY: - case AMDGPU::DOT_4: + case R600::PRED_X: + case R600::INTERP_PAIR_XY: + case R600::INTERP_PAIR_ZW: + case R600::INTERP_VEC_LOAD: + case R600::COPY: + case R600::DOT_4: return true; default: return false; @@ -103,9 +103,9 @@ bool IsTrivialInst(MachineInstr &MI) const { switch (MI.getOpcode()) { - case AMDGPU::KILL: - case AMDGPU::RETURN: - case AMDGPU::IMPLICIT_DEF: + case R600::KILL: + case R600::RETURN: + case R600::IMPLICIT_DEF: return true; default: return false; @@ -132,16 +132,16 @@ bool UpdateInstr = true) const { std::vector> UsedKCache; - if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != AMDGPU::DOT_4) + if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4) return true; const SmallVectorImpl> &Consts = TII->getSrcs(MI); assert( - (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == AMDGPU::DOT_4) && + (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) && "Can't assign Const"); for (unsigned i = 0, n = Consts.size(); i < n; ++i) { - if (Consts[i].first->getReg() != AMDGPU::ALU_CONST) + if (Consts[i].first->getReg() != R600::ALU_CONST) continue; unsigned Sel = Consts[i].second; unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31; @@ -172,16 +172,16 @@ return true; for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) { - if (Consts[i].first->getReg() != AMDGPU::ALU_CONST) + if (Consts[i].first->getReg() != R600::ALU_CONST) continue; switch(UsedKCache[j].first) { case 0: Consts[i].first->setReg( - AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[j].second)); + R600::R600_KC0RegClass.getRegister(UsedKCache[j].second)); break; case 1: Consts[i].first->setReg( - AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[j].second)); + R600::R600_KC1RegClass.getRegister(UsedKCache[j].second)); break; default: llvm_unreachable("Wrong Cache Line"); @@ -253,7 +253,7 @@ break; if (AluInstCount > TII->getMaxAlusPerClause()) break; - if (I->getOpcode() == AMDGPU::PRED_X) { + if (I->getOpcode() == R600::PRED_X) { // We put PRED_X in its own clause to ensure that ifcvt won't create // clauses with more than 128 insts. // IfCvt is indeed checking that "then" and "else" branches of an if @@ -289,7 +289,7 @@ AluInstCount += OccupiedDwords(*I); } unsigned Opcode = PushBeforeModifier ? - AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU; + R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU; BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode)) // We don't use the ADDR field until R600ControlFlowFinalizer pass, where // it is safe to assume it is 0. However if we always put 0 here, the ifcvt @@ -322,7 +322,7 @@ BB != BB_E; ++BB) { MachineBasicBlock &MBB = *BB; MachineBasicBlock::iterator I = MBB.begin(); - if (I != MBB.end() && I->getOpcode() == AMDGPU::CF_ALU) + if (I != MBB.end() && I->getOpcode() == R600::CF_ALU) continue; // BB was already parsed for (MachineBasicBlock::iterator E = MBB.end(); I != E;) { if (isALU(*I)) { Index: lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp =================================================================== --- lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -96,16 +96,16 @@ // Expand LDS_*_RET instructions if (TII->isLDSRetInstr(MI.getOpcode())) { - int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); + int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst); assert(DstIdx != -1); MachineOperand &DstOp = MI.getOperand(DstIdx); MachineInstr *Mov = TII->buildMovInstr(&MBB, I, - DstOp.getReg(), AMDGPU::OQAP); - DstOp.setReg(AMDGPU::OQAP); + DstOp.getReg(), R600::OQAP); + DstOp.setReg(R600::OQAP); int LDSPredSelIdx = TII->getOperandIdx(MI.getOpcode(), - AMDGPU::OpName::pred_sel); + R600::OpName::pred_sel); int MovPredSelIdx = TII->getOperandIdx(Mov->getOpcode(), - AMDGPU::OpName::pred_sel); + R600::OpName::pred_sel); // Copy the pred_sel bit Mov->getOperand(MovPredSelIdx).setReg( MI.getOperand(LDSPredSelIdx).getReg()); @@ -114,7 +114,7 @@ switch (MI.getOpcode()) { default: break; // Expand PRED_X to one of the PRED_SET instructions. - case AMDGPU::PRED_X: { + case R600::PRED_X: { uint64_t Flags = MI.getOperand(3).getImm(); // The native opcode used by PRED_X is stored as an immediate in the // third operand. @@ -122,17 +122,18 @@ MI.getOperand(2).getImm(), // opcode MI.getOperand(0).getReg(), // dst MI.getOperand(1).getReg(), // src0 - AMDGPU::ZERO); // src1 + R600::ZERO); // src1 TII->addFlag(*PredSet, 0, MO_FLAG_MASK); if (Flags & MO_FLAG_PUSH) { - TII->setImmOperand(*PredSet, AMDGPU::OpName::update_exec_mask, 1); + TII->setImmOperand(*PredSet, R600::OpName::update_exec_mask, 1); } else { - TII->setImmOperand(*PredSet, AMDGPU::OpName::update_pred, 1); + TII->setImmOperand(*PredSet, R600::OpName::update_pred, 1); } MI.eraseFromParent(); continue; } - case AMDGPU::DOT_4: { + case R600::DOT_4: { + const R600RegisterInfo &TRI = TII->getRegisterInfo(); unsigned DstReg = MI.getOperand(0).getReg(); @@ -141,7 +142,7 @@ for (unsigned Chan = 0; Chan < 4; ++Chan) { bool Mask = (Chan != TRI.getHWRegChan(DstReg)); unsigned SubDstReg = - AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); + R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); MachineInstr *BMI = TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg); if (Chan > 0) { @@ -156,10 +157,10 @@ // While not strictly necessary from hw point of view, we force // all src operands of a dot4 inst to belong to the same slot. unsigned Src0 = BMI->getOperand( - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0)) + TII->getOperandIdx(Opcode, R600::OpName::src0)) .getReg(); unsigned Src1 = BMI->getOperand( - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1)) + TII->getOperandIdx(Opcode, R600::OpName::src1)) .getReg(); (void) Src0; (void) Src1; @@ -206,14 +207,14 @@ // T0_W = CUBE T1_Y, T1_Z for (unsigned Chan = 0; Chan < 4; Chan++) { unsigned DstReg = MI.getOperand( - TII->getOperandIdx(MI, AMDGPU::OpName::dst)).getReg(); + TII->getOperandIdx(MI, R600::OpName::dst)).getReg(); unsigned Src0 = MI.getOperand( - TII->getOperandIdx(MI, AMDGPU::OpName::src0)).getReg(); + TII->getOperandIdx(MI, R600::OpName::src0)).getReg(); unsigned Src1 = 0; // Determine the correct source registers if (!IsCube) { - int Src1Idx = TII->getOperandIdx(MI, AMDGPU::OpName::src1); + int Src1Idx = TII->getOperandIdx(MI, R600::OpName::src1); if (Src1Idx != -1) { Src1 = MI.getOperand(Src1Idx).getReg(); } @@ -241,7 +242,7 @@ // the current Channel. Mask = (Chan != TRI.getHWRegChan(DstReg)); unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK; - DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); + DstReg = R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); } // Set the IsLast bit @@ -250,11 +251,11 @@ // Add the new instruction unsigned Opcode = MI.getOpcode(); switch (Opcode) { - case AMDGPU::CUBE_r600_pseudo: - Opcode = AMDGPU::CUBE_r600_real; + case R600::CUBE_r600_pseudo: + Opcode = R600::CUBE_r600_real; break; - case AMDGPU::CUBE_eg_pseudo: - Opcode = AMDGPU::CUBE_eg_real; + case R600::CUBE_eg_pseudo: + Opcode = R600::CUBE_eg_real; break; default: break; @@ -271,12 +272,12 @@ if (NotLast) { TII->addFlag(*NewMI, 0, MO_FLAG_NOT_LAST); } - SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::clamp); - SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::literal); - SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_abs); - SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_abs); - SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_neg); - SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_neg); + SetFlagInNewMI(NewMI, &MI, R600::OpName::clamp); + SetFlagInNewMI(NewMI, &MI, R600::OpName::literal); + SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_abs); + SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_abs); + SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_neg); + SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_neg); } MI.eraseFromParent(); } Index: lib/Target/AMDGPU/R600ISelLowering.h =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.h +++ lib/Target/AMDGPU/R600ISelLowering.h @@ -23,6 +23,8 @@ class R600Subtarget; class R600TargetLowering final : public AMDGPUTargetLowering { + + const R600Subtarget *Subtarget; public: R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI); @@ -36,6 +38,7 @@ void ReplaceNodeResults(SDNode * N, SmallVectorImpl &Results, SelectionDAG &DAG) const override; + CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, Index: lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.cpp +++ lib/Target/AMDGPU/R600ISelLowering.cpp @@ -14,7 +14,6 @@ #include "R600ISelLowering.h" #include "AMDGPUFrameLowering.h" -#include "AMDGPUIntrinsicInfo.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600FrameLowering.h" @@ -51,17 +50,31 @@ using namespace llvm; +static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + MachineFunction &MF = State.getMachineFunction(); + AMDGPUMachineFunction *MFI = MF.getInfo(); + + uint64_t Offset = MFI->allocateKernArg(LocVT.getStoreSize(), + ArgFlags.getOrigAlign()); + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return true; +} + +#include "R600GenCallingConv.inc" + R600TargetLowering::R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI) - : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) { - addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); - addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); - addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass); - addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass); - addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); - addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); + : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) { + addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass); + addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass); + addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass); + addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass); + addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass); + addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass); - computeRegisterProperties(STI.getRegisterInfo()); + computeRegisterProperties(Subtarget->getRegisterInfo()); // Legalize loads and stores to the private address space. setOperationAction(ISD::LOAD, MVT::i32, Custom); @@ -148,6 +161,11 @@ setOperationAction(ISD::FSUB, MVT::f32, Expand); + setOperationAction(ISD::FCEIL, MVT::f64, Custom); + setOperationAction(ISD::FTRUNC, MVT::f64, Custom); + setOperationAction(ISD::FRINT, MVT::f64, Custom); + setOperationAction(ISD::FFLOOR, MVT::f64, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); @@ -216,6 +234,34 @@ setOperationAction(ISD::FMA, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f64, Expand); } + + // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we + // need it for R600. + if (!Subtarget->hasFP32Denormals()) + setOperationAction(ISD::FMAD, MVT::f32, Legal); + + if (!Subtarget->hasBFI()) { + // fcopysign can be done in a single instruction with BFI. + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + } + + if (!Subtarget->hasBCNT(32)) + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + + if (!Subtarget->hasBCNT(64)) + setOperationAction(ISD::CTPOP, MVT::i64, Expand); + + if (Subtarget->hasFFBH()) + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); + + if (Subtarget->hasFFBL()) + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); + + // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we + // need it for R600. + if (Subtarget->hasBFE()) + setHasExtractBitsInsn(true); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); @@ -246,14 +292,10 @@ setTargetDAGCombine(ISD::LOAD); } -const R600Subtarget *R600TargetLowering::getSubtarget() const { - return static_cast(Subtarget); -} - static inline bool isEOP(MachineBasicBlock::iterator I) { if (std::next(I) == I->getParent()->end()) return false; - return std::next(I)->getOpcode() == AMDGPU::RETURN; + return std::next(I)->getOpcode() == R600::RETURN; } MachineBasicBlock * @@ -262,24 +304,24 @@ MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock::iterator I = MI; - const R600InstrInfo *TII = getSubtarget()->getInstrInfo(); + const R600InstrInfo *TII = Subtarget->getInstrInfo(); switch (MI.getOpcode()) { default: // Replace LDS_*_RET instruction that don't have any uses with the // equivalent LDS_*_NORET instruction. if (TII->isLDSRetInstr(MI.getOpcode())) { - int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); + int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst); assert(DstIdx != -1); MachineInstrBuilder NewMI; // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add // LDS_1A2D support and remove this special case. if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) || - MI.getOpcode() == AMDGPU::LDS_CMPST_RET) + MI.getOpcode() == R600::LDS_CMPST_RET) return BB; NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), - TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode()))); + TII->get(R600::getLDSNoRetOp(MI.getOpcode()))); for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) { NewMI.add(MI.getOperand(i)); } @@ -288,23 +330,23 @@ } break; - case AMDGPU::FABS_R600: { + case R600::FABS_R600: { MachineInstr *NewMI = TII->buildDefaultInstruction( - *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(), + *BB, I, R600::MOV, MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); TII->addFlag(*NewMI, 0, MO_FLAG_ABS); break; } - case AMDGPU::FNEG_R600: { + case R600::FNEG_R600: { MachineInstr *NewMI = TII->buildDefaultInstruction( - *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(), + *BB, I, R600::MOV, MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); TII->addFlag(*NewMI, 0, MO_FLAG_NEG); break; } - case AMDGPU::MASK_WRITE: { + case R600::MASK_WRITE: { unsigned maskedRegister = MI.getOperand(0).getReg(); assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); @@ -312,7 +354,7 @@ break; } - case AMDGPU::MOV_IMM_F32: + case R600::MOV_IMM_F32: TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1) .getFPImm() ->getValueAPF() @@ -320,39 +362,39 @@ .getZExtValue()); break; - case AMDGPU::MOV_IMM_I32: + case R600::MOV_IMM_I32: TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1).getImm()); break; - case AMDGPU::MOV_IMM_GLOBAL_ADDR: { + case R600::MOV_IMM_GLOBAL_ADDR: { //TODO: Perhaps combine this instruction with the next if possible auto MIB = TII->buildDefaultInstruction( - *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_LITERAL_X); - int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal); + *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X); + int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal); //TODO: Ugh this is rather ugly MIB->getOperand(Idx) = MI.getOperand(1); break; } - case AMDGPU::CONST_COPY: { + case R600::CONST_COPY: { MachineInstr *NewMI = TII->buildDefaultInstruction( - *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_CONST); - TII->setImmOperand(*NewMI, AMDGPU::OpName::src0_sel, + *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST); + TII->setImmOperand(*NewMI, R600::OpName::src0_sel, MI.getOperand(1).getImm()); break; } - case AMDGPU::RAT_WRITE_CACHELESS_32_eg: - case AMDGPU::RAT_WRITE_CACHELESS_64_eg: - case AMDGPU::RAT_WRITE_CACHELESS_128_eg: + case R600::RAT_WRITE_CACHELESS_32_eg: + case R600::RAT_WRITE_CACHELESS_64_eg: + case R600::RAT_WRITE_CACHELESS_128_eg: BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) .add(MI.getOperand(0)) .add(MI.getOperand(1)) .addImm(isEOP(I)); // Set End of program bit break; - case AMDGPU::RAT_STORE_TYPED_eg: + case R600::RAT_STORE_TYPED_eg: BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) .add(MI.getOperand(0)) .add(MI.getOperand(1)) @@ -360,49 +402,49 @@ .addImm(isEOP(I)); // Set End of program bit break; - case AMDGPU::BRANCH: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) + case R600::BRANCH: + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP)) .add(MI.getOperand(0)); break; - case AMDGPU::BRANCH_COND_f32: { + case R600::BRANCH_COND_f32: { MachineInstr *NewMI = - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), - AMDGPU::PREDICATE_BIT) + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X), + R600::PREDICATE_BIT) .add(MI.getOperand(1)) - .addImm(AMDGPU::PRED_SETNE) + .addImm(R600::PRED_SETNE) .addImm(0); // Flags TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND)) .add(MI.getOperand(0)) - .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); + .addReg(R600::PREDICATE_BIT, RegState::Kill); break; } - case AMDGPU::BRANCH_COND_i32: { + case R600::BRANCH_COND_i32: { MachineInstr *NewMI = - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), - AMDGPU::PREDICATE_BIT) + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X), + R600::PREDICATE_BIT) .add(MI.getOperand(1)) - .addImm(AMDGPU::PRED_SETNE_INT) + .addImm(R600::PRED_SETNE_INT) .addImm(0); // Flags TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND)) .add(MI.getOperand(0)) - .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); + .addReg(R600::PREDICATE_BIT, RegState::Kill); break; } - case AMDGPU::EG_ExportSwz: - case AMDGPU::R600_ExportSwz: { + case R600::EG_ExportSwz: + case R600::R600_ExportSwz: { // Instruction is left unmodified if its not the last one of its type bool isLastInstructionOfItsType = true; unsigned InstExportType = MI.getOperand(1).getImm(); for (MachineBasicBlock::iterator NextExportInst = std::next(I), EndBlock = BB->end(); NextExportInst != EndBlock; NextExportInst = std::next(NextExportInst)) { - if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz || - NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) { + if (NextExportInst->getOpcode() == R600::EG_ExportSwz || + NextExportInst->getOpcode() == R600::R600_ExportSwz) { unsigned CurrentInstExportType = NextExportInst->getOperand(1) .getImm(); if (CurrentInstExportType == InstExportType) { @@ -414,7 +456,7 @@ bool EOP = isEOP(I); if (!EOP && !isLastInstructionOfItsType) return BB; - unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40; + unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40; BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) .add(MI.getOperand(0)) .add(MI.getOperand(1)) @@ -427,7 +469,7 @@ .addImm(EOP); break; } - case AMDGPU::RETURN: { + case R600::RETURN: { return BB; } } @@ -583,23 +625,23 @@ return LowerImplicitParameter(DAG, VT, DL, 8); case Intrinsic::r600_read_tgid_x: - return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T1_X, VT); + return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, + R600::T1_X, VT); case Intrinsic::r600_read_tgid_y: - return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T1_Y, VT); + return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, + R600::T1_Y, VT); case Intrinsic::r600_read_tgid_z: - return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T1_Z, VT); + return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, + R600::T1_Z, VT); case Intrinsic::r600_read_tidig_x: - return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T0_X, VT); + return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, + R600::T0_X, VT); case Intrinsic::r600_read_tidig_y: - return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T0_Y, VT); + return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, + R600::T0_Y, VT); case Intrinsic::r600_read_tidig_z: - return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T0_Z, VT); + return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, + R600::T0_Z, VT); case Intrinsic::r600_recipsqrt_ieee: return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); @@ -1521,7 +1563,7 @@ SDValue R600TargetLowering::lowerFrameIndex(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - const R600FrameLowering *TFL = getSubtarget()->getFrameLowering(); + const R600FrameLowering *TFL = Subtarget->getFrameLowering(); FrameIndexSDNode *FIN = cast(Op); @@ -1533,6 +1575,28 @@ Op.getValueType()); } +CCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC, + bool IsVarArg) const { + switch (CC) { + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + case CallingConv::C: + case CallingConv::Fast: + case CallingConv::Cold: + return CC_R600_Kernel; + case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_PS: + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_ES: + case CallingConv::AMDGPU_LS: + return CC_R600; + default: + report_fatal_error("Unsupported calling convention."); + } +} + /// XXX Only kernel functions are supported, so we can assume for now that /// every function is a kernel function, but in the future we should use /// separate calling conventions for kernel and non-kernel functions. @@ -1565,7 +1629,7 @@ } if (AMDGPU::isShader(CallConv)) { - unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass); SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT); InVals.push_back(Register); continue; @@ -1596,7 +1660,7 @@ unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset(); unsigned PartOffset = VA.getLocMemOffset(); - unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF.getFunction()) + + unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset(); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase); @@ -1984,26 +2048,26 @@ SDValue &Src, SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) const { - const R600InstrInfo *TII = getSubtarget()->getInstrInfo(); + const R600InstrInfo *TII = Subtarget->getInstrInfo(); if (!Src.isMachineOpcode()) return false; switch (Src.getMachineOpcode()) { - case AMDGPU::FNEG_R600: + case R600::FNEG_R600: if (!Neg.getNode()) return false; Src = Src.getOperand(0); Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32); return true; - case AMDGPU::FABS_R600: + case R600::FABS_R600: if (!Abs.getNode()) return false; Src = Src.getOperand(0); Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32); return true; - case AMDGPU::CONST_COPY: { + case R600::CONST_COPY: { unsigned Opcode = ParentNode->getMachineOpcode(); - bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; + bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; if (!Sel.getNode()) return false; @@ -2014,17 +2078,17 @@ // Gather constants values int SrcIndices[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) + TII->getOperandIdx(Opcode, R600::OpName::src0), + TII->getOperandIdx(Opcode, R600::OpName::src1), + TII->getOperandIdx(Opcode, R600::OpName::src2), + TII->getOperandIdx(Opcode, R600::OpName::src0_X), + TII->getOperandIdx(Opcode, R600::OpName::src0_Y), + TII->getOperandIdx(Opcode, R600::OpName::src0_Z), + TII->getOperandIdx(Opcode, R600::OpName::src0_W), + TII->getOperandIdx(Opcode, R600::OpName::src1_X), + TII->getOperandIdx(Opcode, R600::OpName::src1_Y), + TII->getOperandIdx(Opcode, R600::OpName::src1_Z), + TII->getOperandIdx(Opcode, R600::OpName::src1_W) }; std::vector Consts; for (int OtherSrcIdx : SrcIndices) { @@ -2037,7 +2101,7 @@ } if (RegisterSDNode *Reg = dyn_cast(ParentNode->getOperand(OtherSrcIdx))) { - if (Reg->getReg() == AMDGPU::ALU_CONST) { + if (Reg->getReg() == R600::ALU_CONST) { ConstantSDNode *Cst = cast(ParentNode->getOperand(OtherSelIdx)); Consts.push_back(Cst->getZExtValue()); @@ -2052,30 +2116,30 @@ } Sel = CstOffset; - Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32); + Src = DAG.getRegister(R600::ALU_CONST, MVT::f32); return true; } - case AMDGPU::MOV_IMM_GLOBAL_ADDR: + case R600::MOV_IMM_GLOBAL_ADDR: // Check if the Imm slot is used. Taken from below. if (cast(Imm)->getZExtValue()) return false; Imm = Src.getOperand(0); - Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32); + Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32); return true; - case AMDGPU::MOV_IMM_I32: - case AMDGPU::MOV_IMM_F32: { - unsigned ImmReg = AMDGPU::ALU_LITERAL_X; + case R600::MOV_IMM_I32: + case R600::MOV_IMM_F32: { + unsigned ImmReg = R600::ALU_LITERAL_X; uint64_t ImmValue = 0; - if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) { + if (Src.getMachineOpcode() == R600::MOV_IMM_F32) { ConstantFPSDNode *FPC = dyn_cast(Src.getOperand(0)); float FloatValue = FPC->getValueAPF().convertToFloat(); if (FloatValue == 0.0) { - ImmReg = AMDGPU::ZERO; + ImmReg = R600::ZERO; } else if (FloatValue == 0.5) { - ImmReg = AMDGPU::HALF; + ImmReg = R600::HALF; } else if (FloatValue == 1.0) { - ImmReg = AMDGPU::ONE; + ImmReg = R600::ONE; } else { ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue(); } @@ -2083,9 +2147,9 @@ ConstantSDNode *C = dyn_cast(Src.getOperand(0)); uint64_t Value = C->getZExtValue(); if (Value == 0) { - ImmReg = AMDGPU::ZERO; + ImmReg = R600::ZERO; } else if (Value == 1) { - ImmReg = AMDGPU::ONE_INT; + ImmReg = R600::ONE_INT; } else { ImmValue = Value; } @@ -2094,7 +2158,7 @@ // Check that we aren't already using an immediate. // XXX: It's possible for an instruction to have more than one // immediate operand, but this is not supported yet. - if (ImmReg == AMDGPU::ALU_LITERAL_X) { + if (ImmReg == R600::ALU_LITERAL_X) { if (!Imm.getNode()) return false; ConstantSDNode *C = dyn_cast(Imm); @@ -2114,7 +2178,7 @@ /// Fold the instructions after selecting them SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, SelectionDAG &DAG) const { - const R600InstrInfo *TII = getSubtarget()->getInstrInfo(); + const R600InstrInfo *TII = Subtarget->getInstrInfo(); if (!Node->isMachineOpcode()) return Node; @@ -2123,36 +2187,36 @@ std::vector Ops(Node->op_begin(), Node->op_end()); - if (Opcode == AMDGPU::DOT_4) { + if (Opcode == R600::DOT_4) { int OperandIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) + TII->getOperandIdx(Opcode, R600::OpName::src0_X), + TII->getOperandIdx(Opcode, R600::OpName::src0_Y), + TII->getOperandIdx(Opcode, R600::OpName::src0_Z), + TII->getOperandIdx(Opcode, R600::OpName::src0_W), + TII->getOperandIdx(Opcode, R600::OpName::src1_X), + TII->getOperandIdx(Opcode, R600::OpName::src1_Y), + TII->getOperandIdx(Opcode, R600::OpName::src1_Z), + TII->getOperandIdx(Opcode, R600::OpName::src1_W) }; int NegIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W) + TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X), + TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y), + TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z), + TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W), + TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X), + TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y), + TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z), + TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W) }; int AbsIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W) + TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X), + TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y), + TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z), + TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W), + TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X), + TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y), + TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z), + TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W) }; for (unsigned i = 0; i < 8; i++) { if (OperandIdx[i] < 0) @@ -2160,7 +2224,7 @@ SDValue &Src = Ops[OperandIdx[i] - 1]; SDValue &Neg = Ops[NegIdx[i] - 1]; SDValue &Abs = Ops[AbsIdx[i] - 1]; - bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; + bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); if (HasDst) SelIdx--; @@ -2168,7 +2232,7 @@ if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG)) return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); } - } else if (Opcode == AMDGPU::REG_SEQUENCE) { + } else if (Opcode == R600::REG_SEQUENCE) { for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) { SDValue &Src = Ops[i]; if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG)) @@ -2178,18 +2242,18 @@ if (!TII->hasInstrModifiers(Opcode)) return Node; int OperandIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2) + TII->getOperandIdx(Opcode, R600::OpName::src0), + TII->getOperandIdx(Opcode, R600::OpName::src1), + TII->getOperandIdx(Opcode, R600::OpName::src2) }; int NegIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg) + TII->getOperandIdx(Opcode, R600::OpName::src0_neg), + TII->getOperandIdx(Opcode, R600::OpName::src1_neg), + TII->getOperandIdx(Opcode, R600::OpName::src2_neg) }; int AbsIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs), + TII->getOperandIdx(Opcode, R600::OpName::src0_abs), + TII->getOperandIdx(Opcode, R600::OpName::src1_abs), -1 }; for (unsigned i = 0; i < 3; i++) { @@ -2199,9 +2263,9 @@ SDValue &Neg = Ops[NegIdx[i] - 1]; SDValue FakeAbs; SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs; - bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; + bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); - int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal); + int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal); if (HasDst) { SelIdx--; ImmIdx--; Index: lib/Target/AMDGPU/R600InstrFormats.td =================================================================== --- lib/Target/AMDGPU/R600InstrFormats.td +++ lib/Target/AMDGPU/R600InstrFormats.td @@ -41,7 +41,7 @@ bit LDS_1A2D = 0; let SubtargetPredicate = isR600toCayman; - let Namespace = "AMDGPU"; + let Namespace = "R600"; let OutOperandList = outs; let InOperandList = ins; let AsmString = asm; Index: lib/Target/AMDGPU/R600InstrInfo.h =================================================================== --- lib/Target/AMDGPU/R600InstrInfo.h +++ lib/Target/AMDGPU/R600InstrInfo.h @@ -15,8 +15,11 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_R600INSTRINFO_H #define LLVM_LIB_TARGET_AMDGPU_R600INSTRINFO_H -#include "AMDGPUInstrInfo.h" #include "R600RegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "R600GenInstrInfo.inc" namespace llvm { @@ -34,7 +37,7 @@ class MachineInstrBuilder; class R600Subtarget; -class R600InstrInfo final : public AMDGPUInstrInfo { +class R600InstrInfo final : public R600GenInstrInfo { private: const R600RegisterInfo RI; const R600Subtarget &ST; @@ -324,7 +327,7 @@ PseudoSourceValue::PSVKind Kind) const override; }; -namespace AMDGPU { +namespace R600 { int getLDSNoRetOp(uint16_t Opcode); Index: lib/Target/AMDGPU/R600InstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/R600InstrInfo.cpp +++ lib/Target/AMDGPU/R600InstrInfo.cpp @@ -45,10 +45,15 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR -#include "AMDGPUGenDFAPacketizer.inc" +#include "R600GenDFAPacketizer.inc" + +#define GET_INSTRINFO_CTOR_DTOR +#define GET_INSTRMAP_INFO +#define GET_INSTRINFO_NAMED_OPS +#include "R600GenInstrInfo.inc" R600InstrInfo::R600InstrInfo(const R600Subtarget &ST) - : AMDGPUInstrInfo(ST), RI(), ST(ST) {} + : R600GenInstrInfo(-1, -1), RI(), ST(ST) {} bool R600InstrInfo::isVector(const MachineInstr &MI) const { return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; @@ -59,31 +64,31 @@ const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { unsigned VectorComponents = 0; - if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) || - AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) && - (AMDGPU::R600_Reg128RegClass.contains(SrcReg) || - AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) { + if ((R600::R600_Reg128RegClass.contains(DestReg) || + R600::R600_Reg128VerticalRegClass.contains(DestReg)) && + (R600::R600_Reg128RegClass.contains(SrcReg) || + R600::R600_Reg128VerticalRegClass.contains(SrcReg))) { VectorComponents = 4; - } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) || - AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) && - (AMDGPU::R600_Reg64RegClass.contains(SrcReg) || - AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) { + } else if((R600::R600_Reg64RegClass.contains(DestReg) || + R600::R600_Reg64VerticalRegClass.contains(DestReg)) && + (R600::R600_Reg64RegClass.contains(SrcReg) || + R600::R600_Reg64VerticalRegClass.contains(SrcReg))) { VectorComponents = 2; } if (VectorComponents > 0) { for (unsigned I = 0; I < VectorComponents; I++) { unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(I); - buildDefaultInstruction(MBB, MI, AMDGPU::MOV, + buildDefaultInstruction(MBB, MI, R600::MOV, RI.getSubReg(DestReg, SubRegIndex), RI.getSubReg(SrcReg, SubRegIndex)) .addReg(DestReg, RegState::Define | RegState::Implicit); } } else { - MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, + MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, R600::MOV, DestReg, SrcReg); - NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0)) + NewMI->getOperand(getOperandIdx(*NewMI, R600::OpName::src0)) .setIsKill(KillSrc); } } @@ -104,9 +109,9 @@ switch(Opcode) { default: return false; - case AMDGPU::MOV: - case AMDGPU::MOV_IMM_F32: - case AMDGPU::MOV_IMM_I32: + case R600::MOV: + case R600::MOV_IMM_F32: + case R600::MOV_IMM_I32: return true; } } @@ -118,10 +123,10 @@ bool R600InstrInfo::isCubeOp(unsigned Opcode) const { switch(Opcode) { default: return false; - case AMDGPU::CUBE_r600_pseudo: - case AMDGPU::CUBE_r600_real: - case AMDGPU::CUBE_eg_pseudo: - case AMDGPU::CUBE_eg_real: + case R600::CUBE_r600_pseudo: + case R600::CUBE_r600_real: + case R600::CUBE_eg_pseudo: + case R600::CUBE_eg_real: return true; } } @@ -149,7 +154,7 @@ } bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const { - return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1; + return isLDSInstr(Opcode) && getOperandIdx(Opcode, R600::OpName::dst) != -1; } bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const { @@ -158,12 +163,12 @@ if (isVector(MI) || isCubeOp(MI.getOpcode())) return true; switch (MI.getOpcode()) { - case AMDGPU::PRED_X: - case AMDGPU::INTERP_PAIR_XY: - case AMDGPU::INTERP_PAIR_ZW: - case AMDGPU::INTERP_VEC_LOAD: - case AMDGPU::COPY: - case AMDGPU::DOT_4: + case R600::PRED_X: + case R600::INTERP_PAIR_XY: + case R600::INTERP_PAIR_ZW: + case R600::INTERP_VEC_LOAD: + case R600::COPY: + case R600::DOT_4: return true; default: return false; @@ -173,7 +178,7 @@ bool R600InstrInfo::isTransOnly(unsigned Opcode) const { if (ST.hasCaymanISA()) return false; - return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU); + return (get(Opcode).getSchedClass() == R600::Sched::TransALU); } bool R600InstrInfo::isTransOnly(const MachineInstr &MI) const { @@ -181,7 +186,7 @@ } bool R600InstrInfo::isVectorOnly(unsigned Opcode) const { - return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU); + return (get(Opcode).getSchedClass() == R600::Sched::VecALU); } bool R600InstrInfo::isVectorOnly(const MachineInstr &MI) const { @@ -215,8 +220,8 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { switch (Opcode) { - case AMDGPU::KILLGT: - case AMDGPU::GROUP_BARRIER: + case R600::KILLGT: + case R600::GROUP_BARRIER: return true; default: return false; @@ -224,11 +229,11 @@ } bool R600InstrInfo::usesAddressRegister(MachineInstr &MI) const { - return MI.findRegisterUseOperandIdx(AMDGPU::AR_X) != -1; + return MI.findRegisterUseOperandIdx(R600::AR_X) != -1; } bool R600InstrInfo::definesAddressRegister(MachineInstr &MI) const { - return MI.findRegisterDefOperandIdx(AMDGPU::AR_X) != -1; + return MI.findRegisterDefOperandIdx(R600::AR_X) != -1; } bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const { @@ -242,7 +247,7 @@ TargetRegisterInfo::isVirtualRegister(I->getReg())) continue; - if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg())) + if (R600::R600_LDS_SRC_REGRegClass.contains(I->getReg())) return true; } return false; @@ -250,17 +255,17 @@ int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { static const unsigned SrcSelTable[][2] = { - {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, - {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, - {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, - {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, - {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, - {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, - {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, - {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, - {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, - {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, - {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W} + {R600::OpName::src0, R600::OpName::src0_sel}, + {R600::OpName::src1, R600::OpName::src1_sel}, + {R600::OpName::src2, R600::OpName::src2_sel}, + {R600::OpName::src0_X, R600::OpName::src0_sel_X}, + {R600::OpName::src0_Y, R600::OpName::src0_sel_Y}, + {R600::OpName::src0_Z, R600::OpName::src0_sel_Z}, + {R600::OpName::src0_W, R600::OpName::src0_sel_W}, + {R600::OpName::src1_X, R600::OpName::src1_sel_X}, + {R600::OpName::src1_Y, R600::OpName::src1_sel_Y}, + {R600::OpName::src1_Z, R600::OpName::src1_sel_Z}, + {R600::OpName::src1_W, R600::OpName::src1_sel_W} }; for (const auto &Row : SrcSelTable) { @@ -275,23 +280,23 @@ R600InstrInfo::getSrcs(MachineInstr &MI) const { SmallVector, 3> Result; - if (MI.getOpcode() == AMDGPU::DOT_4) { + if (MI.getOpcode() == R600::DOT_4) { static const unsigned OpTable[8][2] = { - {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, - {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, - {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, - {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, - {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, - {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, - {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, - {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}, + {R600::OpName::src0_X, R600::OpName::src0_sel_X}, + {R600::OpName::src0_Y, R600::OpName::src0_sel_Y}, + {R600::OpName::src0_Z, R600::OpName::src0_sel_Z}, + {R600::OpName::src0_W, R600::OpName::src0_sel_W}, + {R600::OpName::src1_X, R600::OpName::src1_sel_X}, + {R600::OpName::src1_Y, R600::OpName::src1_sel_Y}, + {R600::OpName::src1_Z, R600::OpName::src1_sel_Z}, + {R600::OpName::src1_W, R600::OpName::src1_sel_W}, }; for (unsigned j = 0; j < 8; j++) { MachineOperand &MO = MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0])); unsigned Reg = MO.getReg(); - if (Reg == AMDGPU::ALU_CONST) { + if (Reg == R600::ALU_CONST) { MachineOperand &Sel = MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1])); Result.push_back(std::make_pair(&MO, Sel.getImm())); @@ -303,9 +308,9 @@ } static const unsigned OpTable[3][2] = { - {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, - {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, - {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, + {R600::OpName::src0, R600::OpName::src0_sel}, + {R600::OpName::src1, R600::OpName::src1_sel}, + {R600::OpName::src2, R600::OpName::src2_sel}, }; for (unsigned j = 0; j < 3; j++) { @@ -314,15 +319,15 @@ break; MachineOperand &MO = MI.getOperand(SrcIdx); unsigned Reg = MO.getReg(); - if (Reg == AMDGPU::ALU_CONST) { + if (Reg == R600::ALU_CONST) { MachineOperand &Sel = MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1])); Result.push_back(std::make_pair(&MO, Sel.getImm())); continue; } - if (Reg == AMDGPU::ALU_LITERAL_X) { + if (Reg == R600::ALU_LITERAL_X) { MachineOperand &Operand = - MI.getOperand(getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal)); + MI.getOperand(getOperandIdx(MI.getOpcode(), R600::OpName::literal)); if (Operand.isImm()) { Result.push_back(std::make_pair(&MO, Operand.getImm())); continue; @@ -346,7 +351,7 @@ ++i; unsigned Reg = Src.first->getReg(); int Index = RI.getEncodingValue(Reg) & 0xff; - if (Reg == AMDGPU::OQAP) { + if (Reg == R600::OQAP) { Result.push_back(std::make_pair(Index, 0U)); } if (PV.find(Reg) != PV.end()) { @@ -436,7 +441,7 @@ const std::pair &Src = Srcs[j]; if (Src.first < 0 || Src.first == 255) continue; - if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) { + if (Src.first == GET_REG_INDEX(RI.getEncodingValue(R600::OQAP))) { if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 && Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) { // The value from output queue A (denoted by register OQAP) can @@ -542,7 +547,7 @@ for (unsigned i = 0, e = IG.size(); i < e; ++i) { IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount)); unsigned Op = getOperandIdx(IG[i]->getOpcode(), - AMDGPU::OpName::bank_swizzle); + R600::OpName::bank_swizzle); ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) IG[i]->getOperand(Op).getImm()); } @@ -611,14 +616,14 @@ continue; for (const auto &Src : getSrcs(MI)) { - if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X) + if (Src.first->getReg() == R600::ALU_LITERAL_X) Literals.insert(Src.second); if (Literals.size() > 4) return false; - if (Src.first->getReg() == AMDGPU::ALU_CONST) + if (Src.first->getReg() == R600::ALU_CONST) Consts.push_back(Src.second); - if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) || - AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) { + if (R600::R600_KC0RegClass.contains(Src.first->getReg()) || + R600::R600_KC1RegClass.contains(Src.first->getReg())) { unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff; unsigned Chan = RI.getHWRegChan(Src.first->getReg()); Consts.push_back((Index << 2) | Chan); @@ -637,7 +642,7 @@ static bool isPredicateSetter(unsigned Opcode) { switch (Opcode) { - case AMDGPU::PRED_X: + case R600::PRED_X: return true; default: return false; @@ -659,12 +664,12 @@ static bool isJump(unsigned Opcode) { - return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND; + return Opcode == R600::JUMP || Opcode == R600::JUMP_COND; } static bool isBranch(unsigned Opcode) { - return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 || - Opcode == AMDGPU::BRANCH_COND_f32; + return Opcode == R600::BRANCH || Opcode == R600::BRANCH_COND_i32 || + Opcode == R600::BRANCH_COND_f32; } bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB, @@ -679,7 +684,7 @@ if (I == MBB.end()) return false; - // AMDGPU::BRANCH* instructions are only available after isel and are not + // R600::BRANCH* instructions are only available after isel and are not // handled if (isBranch(I->getOpcode())) return true; @@ -688,7 +693,7 @@ } // Remove successive JUMP - while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) { + while (I != MBB.begin() && std::prev(I)->getOpcode() == R600::JUMP) { MachineBasicBlock::iterator PriorI = std::prev(I); if (AllowModify) I->removeFromParent(); @@ -699,10 +704,10 @@ // If there is only one terminator instruction, process it. unsigned LastOpc = LastInst.getOpcode(); if (I == MBB.begin() || !isJump((--I)->getOpcode())) { - if (LastOpc == AMDGPU::JUMP) { + if (LastOpc == R600::JUMP) { TBB = LastInst.getOperand(0).getMBB(); return false; - } else if (LastOpc == AMDGPU::JUMP_COND) { + } else if (LastOpc == R600::JUMP_COND) { auto predSet = I; while (!isPredicateSetter(predSet->getOpcode())) { predSet = --I; @@ -710,7 +715,7 @@ TBB = LastInst.getOperand(0).getMBB(); Cond.push_back(predSet->getOperand(1)); Cond.push_back(predSet->getOperand(2)); - Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); + Cond.push_back(MachineOperand::CreateReg(R600::PRED_SEL_ONE, false)); return false; } return true; // Can't handle indirect branch. @@ -721,7 +726,7 @@ unsigned SecondLastOpc = SecondLastInst.getOpcode(); // If the block ends with a B and a Bcc, handle it. - if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) { + if (SecondLastOpc == R600::JUMP_COND && LastOpc == R600::JUMP) { auto predSet = --I; while (!isPredicateSetter(predSet->getOpcode())) { predSet = --I; @@ -730,7 +735,7 @@ FBB = LastInst.getOperand(0).getMBB(); Cond.push_back(predSet->getOperand(1)); Cond.push_back(predSet->getOperand(2)); - Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); + Cond.push_back(MachineOperand::CreateReg(R600::PRED_SEL_ONE, false)); return false; } @@ -742,8 +747,8 @@ MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) { for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend(); It != E; ++It) { - if (It->getOpcode() == AMDGPU::CF_ALU || - It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) + if (It->getOpcode() == R600::CF_ALU || + It->getOpcode() == R600::CF_ALU_PUSH_BEFORE) return It.getReverse(); } return MBB.end(); @@ -760,7 +765,7 @@ if (!FBB) { if (Cond.empty()) { - BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB); + BuildMI(&MBB, DL, get(R600::JUMP)).addMBB(TBB); return 1; } else { MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); @@ -768,14 +773,14 @@ addFlag(*PredSet, 0, MO_FLAG_PUSH); PredSet->getOperand(2).setImm(Cond[1].getImm()); - BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) + BuildMI(&MBB, DL, get(R600::JUMP_COND)) .addMBB(TBB) - .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); + .addReg(R600::PREDICATE_BIT, RegState::Kill); MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); if (CfAlu == MBB.end()) return 1; - assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); - CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); + assert (CfAlu->getOpcode() == R600::CF_ALU); + CfAlu->setDesc(get(R600::CF_ALU_PUSH_BEFORE)); return 1; } } else { @@ -783,15 +788,15 @@ assert(PredSet && "No previous predicate !"); addFlag(*PredSet, 0, MO_FLAG_PUSH); PredSet->getOperand(2).setImm(Cond[1].getImm()); - BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) + BuildMI(&MBB, DL, get(R600::JUMP_COND)) .addMBB(TBB) - .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); - BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); + .addReg(R600::PREDICATE_BIT, RegState::Kill); + BuildMI(&MBB, DL, get(R600::JUMP)).addMBB(FBB); MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); if (CfAlu == MBB.end()) return 2; - assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); - CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); + assert (CfAlu->getOpcode() == R600::CF_ALU); + CfAlu->setDesc(get(R600::CF_ALU_PUSH_BEFORE)); return 2; } } @@ -812,18 +817,18 @@ switch (I->getOpcode()) { default: return 0; - case AMDGPU::JUMP_COND: { + case R600::JUMP_COND: { MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); clearFlag(*predSet, 0, MO_FLAG_PUSH); I->eraseFromParent(); MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); if (CfAlu == MBB.end()) break; - assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); - CfAlu->setDesc(get(AMDGPU::CF_ALU)); + assert (CfAlu->getOpcode() == R600::CF_ALU_PUSH_BEFORE); + CfAlu->setDesc(get(R600::CF_ALU)); break; } - case AMDGPU::JUMP: + case R600::JUMP: I->eraseFromParent(); break; } @@ -837,18 +842,18 @@ // FIXME: only one case?? default: return 1; - case AMDGPU::JUMP_COND: { + case R600::JUMP_COND: { MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); clearFlag(*predSet, 0, MO_FLAG_PUSH); I->eraseFromParent(); MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); if (CfAlu == MBB.end()) break; - assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); - CfAlu->setDesc(get(AMDGPU::CF_ALU)); + assert (CfAlu->getOpcode() == R600::CF_ALU_PUSH_BEFORE); + CfAlu->setDesc(get(R600::CF_ALU)); break; } - case AMDGPU::JUMP: + case R600::JUMP: I->eraseFromParent(); break; } @@ -863,9 +868,9 @@ unsigned Reg = MI.getOperand(idx).getReg(); switch (Reg) { default: return false; - case AMDGPU::PRED_SEL_ONE: - case AMDGPU::PRED_SEL_ZERO: - case AMDGPU::PREDICATE_BIT: + case R600::PRED_SEL_ONE: + case R600::PRED_SEL_ZERO: + case R600::PREDICATE_BIT: return true; } } @@ -876,9 +881,9 @@ // be predicated. Until we have proper support for instruction clauses in the // backend, we will mark KILL* instructions as unpredicable. - if (MI.getOpcode() == AMDGPU::KILLGT) { + if (MI.getOpcode() == R600::KILLGT) { return false; - } else if (MI.getOpcode() == AMDGPU::CF_ALU) { + } else if (MI.getOpcode() == R600::CF_ALU) { // If the clause start in the middle of MBB then the MBB has more // than a single clause, unable to predicate several clauses. if (MI.getParent()->begin() != MachineBasicBlock::const_iterator(MI)) @@ -888,7 +893,7 @@ } else if (isVector(MI)) { return false; } else { - return AMDGPUInstrInfo::isPredicable(MI); + return TargetInstrInfo::isPredicable(MI); } } @@ -929,17 +934,17 @@ R600InstrInfo::reverseBranchCondition(SmallVectorImpl &Cond) const { MachineOperand &MO = Cond[1]; switch (MO.getImm()) { - case AMDGPU::PRED_SETE_INT: - MO.setImm(AMDGPU::PRED_SETNE_INT); + case R600::PRED_SETE_INT: + MO.setImm(R600::PRED_SETNE_INT); break; - case AMDGPU::PRED_SETNE_INT: - MO.setImm(AMDGPU::PRED_SETE_INT); + case R600::PRED_SETNE_INT: + MO.setImm(R600::PRED_SETE_INT); break; - case AMDGPU::PRED_SETE: - MO.setImm(AMDGPU::PRED_SETNE); + case R600::PRED_SETE: + MO.setImm(R600::PRED_SETNE); break; - case AMDGPU::PRED_SETNE: - MO.setImm(AMDGPU::PRED_SETE); + case R600::PRED_SETNE: + MO.setImm(R600::PRED_SETE); break; default: return true; @@ -947,11 +952,11 @@ MachineOperand &MO2 = Cond[2]; switch (MO2.getReg()) { - case AMDGPU::PRED_SEL_ZERO: - MO2.setReg(AMDGPU::PRED_SEL_ONE); + case R600::PRED_SEL_ZERO: + MO2.setReg(R600::PRED_SEL_ONE); break; - case AMDGPU::PRED_SEL_ONE: - MO2.setReg(AMDGPU::PRED_SEL_ZERO); + case R600::PRED_SEL_ONE: + MO2.setReg(R600::PRED_SEL_ZERO); break; default: return true; @@ -968,22 +973,22 @@ ArrayRef Pred) const { int PIdx = MI.findFirstPredOperandIdx(); - if (MI.getOpcode() == AMDGPU::CF_ALU) { + if (MI.getOpcode() == R600::CF_ALU) { MI.getOperand(8).setImm(0); return true; } - if (MI.getOpcode() == AMDGPU::DOT_4) { - MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_X)) + if (MI.getOpcode() == R600::DOT_4) { + MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_X)) .setReg(Pred[2].getReg()); - MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Y)) + MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_Y)) .setReg(Pred[2].getReg()); - MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Z)) + MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_Z)) .setReg(Pred[2].getReg()); - MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_W)) + MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_W)) .setReg(Pred[2].getReg()); MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); - MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); + MIB.addReg(R600::PREDICATE_BIT, RegState::Implicit); return true; } @@ -991,7 +996,7 @@ MachineOperand &PMO = MI.getOperand(PIdx); PMO.setReg(Pred[2].getReg()); MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); - MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); + MIB.addReg(R600::PREDICATE_BIT, RegState::Implicit); return true; } @@ -1021,20 +1026,20 @@ default: { MachineBasicBlock *MBB = MI.getParent(); int OffsetOpIdx = - AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::addr); + R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::addr); // addr is a custom operand with multiple MI operands, and only the // first MI operand is given a name. int RegOpIdx = OffsetOpIdx + 1; int ChanOpIdx = - AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::chan); + R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::chan); if (isRegisterLoad(MI)) { int DstOpIdx = - AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); + R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::dst); unsigned RegIndex = MI.getOperand(RegOpIdx).getImm(); unsigned Channel = MI.getOperand(ChanOpIdx).getImm(); unsigned Address = calculateIndirectAddress(RegIndex, Channel); unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); - if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { + if (OffsetReg == R600::INDIRECT_BASE_ADDR) { buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(), getIndirectAddrRegClass()->getRegister(Address)); } else { @@ -1043,12 +1048,12 @@ } } else if (isRegisterStore(MI)) { int ValOpIdx = - AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::val); + R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::val); unsigned RegIndex = MI.getOperand(RegOpIdx).getImm(); unsigned Channel = MI.getOperand(ChanOpIdx).getImm(); unsigned Address = calculateIndirectAddress(RegIndex, Channel); unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); - if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { + if (OffsetReg == R600::INDIRECT_BASE_ADDR) { buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address), MI.getOperand(ValOpIdx).getReg()); } else { @@ -1063,15 +1068,15 @@ MBB->erase(MI); return true; } - case AMDGPU::R600_EXTRACT_ELT_V2: - case AMDGPU::R600_EXTRACT_ELT_V4: + case R600::R600_EXTRACT_ELT_V2: + case R600::R600_EXTRACT_ELT_V4: buildIndirectRead(MI.getParent(), MI, MI.getOperand(0).getReg(), RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address MI.getOperand(2).getReg(), RI.getHWRegChan(MI.getOperand(1).getReg())); break; - case AMDGPU::R600_INSERT_ELT_V2: - case AMDGPU::R600_INSERT_ELT_V4: + case R600::R600_INSERT_ELT_V2: + case R600::R600_INSERT_ELT_V4: buildIndirectWrite(MI.getParent(), MI, MI.getOperand(2).getReg(), // Value RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address MI.getOperand(3).getReg(), // Offset @@ -1096,14 +1101,14 @@ for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) { for (unsigned Chan = 0; Chan < StackWidth; ++Chan) { - unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); + unsigned Reg = R600::R600_TReg32RegClass.getRegister((4 * Index) + Chan); TRI.reserveRegisterTuples(Reserved, Reg); } } } const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const { - return &AMDGPU::R600_TReg32_XRegClass; + return &R600::R600_TReg32_XRegClass; } MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, @@ -1121,20 +1126,20 @@ unsigned AddrReg; switch (AddrChan) { default: llvm_unreachable("Invalid Channel"); - case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break; - case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break; - case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break; - case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break; + case 0: AddrReg = R600::R600_AddrRegClass.getRegister(Address); break; + case 1: AddrReg = R600::R600_Addr_YRegClass.getRegister(Address); break; + case 2: AddrReg = R600::R600_Addr_ZRegClass.getRegister(Address); break; + case 3: AddrReg = R600::R600_Addr_WRegClass.getRegister(Address); break; } - MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, - AMDGPU::AR_X, OffsetReg); - setImmOperand(*MOVA, AMDGPU::OpName::write, 0); + MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, R600::MOVA_INT_eg, + R600::AR_X, OffsetReg); + setImmOperand(*MOVA, R600::OpName::write, 0); - MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, + MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, R600::MOV, AddrReg, ValueReg) - .addReg(AMDGPU::AR_X, + .addReg(R600::AR_X, RegState::Implicit | RegState::Kill); - setImmOperand(*Mov, AMDGPU::OpName::dst_rel, 1); + setImmOperand(*Mov, R600::OpName::dst_rel, 1); return Mov; } @@ -1153,21 +1158,21 @@ unsigned AddrReg; switch (AddrChan) { default: llvm_unreachable("Invalid Channel"); - case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break; - case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break; - case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break; - case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break; + case 0: AddrReg = R600::R600_AddrRegClass.getRegister(Address); break; + case 1: AddrReg = R600::R600_Addr_YRegClass.getRegister(Address); break; + case 2: AddrReg = R600::R600_Addr_ZRegClass.getRegister(Address); break; + case 3: AddrReg = R600::R600_Addr_WRegClass.getRegister(Address); break; } - MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, - AMDGPU::AR_X, + MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, R600::MOVA_INT_eg, + R600::AR_X, OffsetReg); - setImmOperand(*MOVA, AMDGPU::OpName::write, 0); - MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, + setImmOperand(*MOVA, R600::OpName::write, 0); + MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, R600::MOV, ValueReg, AddrReg) - .addReg(AMDGPU::AR_X, + .addReg(R600::AR_X, RegState::Implicit | RegState::Kill); - setImmOperand(*Mov, AMDGPU::OpName::src0_rel, 1); + setImmOperand(*Mov, R600::OpName::src0_rel, 1); return Mov; } @@ -1265,7 +1270,7 @@ //XXX: The r600g finalizer expects this to be 1, once we've moved the //scheduling to the backend, we can change the default to 0. MIB.addImm(1) // $last - .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel + .addReg(R600::PRED_SEL_OFF) // $pred_sel .addImm(0) // $literal .addImm(0); // $bank_swizzle @@ -1286,23 +1291,23 @@ static unsigned getSlotedOps(unsigned Op, unsigned Slot) { switch (Op) { - OPERAND_CASE(AMDGPU::OpName::update_exec_mask) - OPERAND_CASE(AMDGPU::OpName::update_pred) - OPERAND_CASE(AMDGPU::OpName::write) - OPERAND_CASE(AMDGPU::OpName::omod) - OPERAND_CASE(AMDGPU::OpName::dst_rel) - OPERAND_CASE(AMDGPU::OpName::clamp) - OPERAND_CASE(AMDGPU::OpName::src0) - OPERAND_CASE(AMDGPU::OpName::src0_neg) - OPERAND_CASE(AMDGPU::OpName::src0_rel) - OPERAND_CASE(AMDGPU::OpName::src0_abs) - OPERAND_CASE(AMDGPU::OpName::src0_sel) - OPERAND_CASE(AMDGPU::OpName::src1) - OPERAND_CASE(AMDGPU::OpName::src1_neg) - OPERAND_CASE(AMDGPU::OpName::src1_rel) - OPERAND_CASE(AMDGPU::OpName::src1_abs) - OPERAND_CASE(AMDGPU::OpName::src1_sel) - OPERAND_CASE(AMDGPU::OpName::pred_sel) + OPERAND_CASE(R600::OpName::update_exec_mask) + OPERAND_CASE(R600::OpName::update_pred) + OPERAND_CASE(R600::OpName::write) + OPERAND_CASE(R600::OpName::omod) + OPERAND_CASE(R600::OpName::dst_rel) + OPERAND_CASE(R600::OpName::clamp) + OPERAND_CASE(R600::OpName::src0) + OPERAND_CASE(R600::OpName::src0_neg) + OPERAND_CASE(R600::OpName::src0_rel) + OPERAND_CASE(R600::OpName::src0_abs) + OPERAND_CASE(R600::OpName::src0_sel) + OPERAND_CASE(R600::OpName::src1) + OPERAND_CASE(R600::OpName::src1_neg) + OPERAND_CASE(R600::OpName::src1_rel) + OPERAND_CASE(R600::OpName::src1_abs) + OPERAND_CASE(R600::OpName::src1_sel) + OPERAND_CASE(R600::OpName::pred_sel) default: llvm_unreachable("Wrong Operand"); } @@ -1313,39 +1318,39 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) const { - assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"); + assert (MI->getOpcode() == R600::DOT_4 && "Not Implemented"); unsigned Opcode; if (ST.getGeneration() <= R600Subtarget::R700) - Opcode = AMDGPU::DOT4_r600; + Opcode = R600::DOT4_r600; else - Opcode = AMDGPU::DOT4_eg; + Opcode = R600::DOT4_eg; MachineBasicBlock::iterator I = MI; MachineOperand &Src0 = MI->getOperand( - getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot))); + getOperandIdx(MI->getOpcode(), getSlotedOps(R600::OpName::src0, Slot))); MachineOperand &Src1 = MI->getOperand( - getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot))); + getOperandIdx(MI->getOpcode(), getSlotedOps(R600::OpName::src1, Slot))); MachineInstr *MIB = buildDefaultInstruction( MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg()); static const unsigned Operands[14] = { - AMDGPU::OpName::update_exec_mask, - AMDGPU::OpName::update_pred, - AMDGPU::OpName::write, - AMDGPU::OpName::omod, - AMDGPU::OpName::dst_rel, - AMDGPU::OpName::clamp, - AMDGPU::OpName::src0_neg, - AMDGPU::OpName::src0_rel, - AMDGPU::OpName::src0_abs, - AMDGPU::OpName::src0_sel, - AMDGPU::OpName::src1_neg, - AMDGPU::OpName::src1_rel, - AMDGPU::OpName::src1_abs, - AMDGPU::OpName::src1_sel, + R600::OpName::update_exec_mask, + R600::OpName::update_pred, + R600::OpName::write, + R600::OpName::omod, + R600::OpName::dst_rel, + R600::OpName::clamp, + R600::OpName::src0_neg, + R600::OpName::src0_rel, + R600::OpName::src0_abs, + R600::OpName::src0_sel, + R600::OpName::src1_neg, + R600::OpName::src1_rel, + R600::OpName::src1_abs, + R600::OpName::src1_sel, }; MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), - getSlotedOps(AMDGPU::OpName::pred_sel, Slot))); - MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel)) + getSlotedOps(R600::OpName::pred_sel, Slot))); + MIB->getOperand(getOperandIdx(Opcode, R600::OpName::pred_sel)) .setReg(MO.getReg()); for (unsigned i = 0; i < 14; i++) { @@ -1362,16 +1367,16 @@ MachineBasicBlock::iterator I, unsigned DstReg, uint64_t Imm) const { - MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, - AMDGPU::ALU_LITERAL_X); - setImmOperand(*MovImm, AMDGPU::OpName::literal, Imm); + MachineInstr *MovImm = buildDefaultInstruction(BB, I, R600::MOV, DstReg, + R600::ALU_LITERAL_X); + setImmOperand(*MovImm, R600::OpName::literal, Imm); return MovImm; } MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, unsigned DstReg, unsigned SrcReg) const { - return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg); + return buildDefaultInstruction(*MBB, I, R600::MOV, DstReg, SrcReg); } int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const { @@ -1379,7 +1384,7 @@ } int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const { - return AMDGPU::getNamedOperandIdx(Opcode, Op); + return R600::getNamedOperandIdx(Opcode, Op); } void R600InstrInfo::setImmOperand(MachineInstr &MI, unsigned Op, @@ -1406,25 +1411,25 @@ bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3; switch (Flag) { case MO_FLAG_CLAMP: - FlagIndex = getOperandIdx(MI, AMDGPU::OpName::clamp); + FlagIndex = getOperandIdx(MI, R600::OpName::clamp); break; case MO_FLAG_MASK: - FlagIndex = getOperandIdx(MI, AMDGPU::OpName::write); + FlagIndex = getOperandIdx(MI, R600::OpName::write); break; case MO_FLAG_NOT_LAST: case MO_FLAG_LAST: - FlagIndex = getOperandIdx(MI, AMDGPU::OpName::last); + FlagIndex = getOperandIdx(MI, R600::OpName::last); break; case MO_FLAG_NEG: switch (SrcIdx) { case 0: - FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_neg); + FlagIndex = getOperandIdx(MI, R600::OpName::src0_neg); break; case 1: - FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_neg); + FlagIndex = getOperandIdx(MI, R600::OpName::src1_neg); break; case 2: - FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src2_neg); + FlagIndex = getOperandIdx(MI, R600::OpName::src2_neg); break; } break; @@ -1435,10 +1440,10 @@ (void)IsOP3; switch (SrcIdx) { case 0: - FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_abs); + FlagIndex = getOperandIdx(MI, R600::OpName::src0_abs); break; case 1: - FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_abs); + FlagIndex = getOperandIdx(MI, R600::OpName::src1_abs); break; } break; @@ -1499,15 +1504,15 @@ switch (Kind) { case PseudoSourceValue::Stack: case PseudoSourceValue::FixedStack: - return AMDGPUASI.PRIVATE_ADDRESS; + return ST.getAMDGPUAS().PRIVATE_ADDRESS; case PseudoSourceValue::ConstantPool: case PseudoSourceValue::GOT: case PseudoSourceValue::JumpTable: case PseudoSourceValue::GlobalValueCallEntry: case PseudoSourceValue::ExternalSymbolCallEntry: case PseudoSourceValue::TargetCustom: - return AMDGPUASI.CONSTANT_ADDRESS; + return ST.getAMDGPUAS().CONSTANT_ADDRESS; } llvm_unreachable("Invalid pseudo source kind"); - return AMDGPUASI.PRIVATE_ADDRESS; + return ST.getAMDGPUAS().PRIVATE_ADDRESS; } Index: lib/Target/AMDGPU/R600Instructions.td =================================================================== --- lib/Target/AMDGPU/R600Instructions.td +++ lib/Target/AMDGPU/R600Instructions.td @@ -18,13 +18,13 @@ class R600WrapperInst pattern = []> : AMDGPUInst, PredicateControl { let SubtargetPredicate = isR600toCayman; + let Namespace = "R600"; } class InstR600ISA pattern = []> : InstR600 { - let Namespace = "AMDGPU"; } def MEMxi : Operand { @@ -86,6 +86,12 @@ def R600_Pred : PredicateOperand; +let isTerminator = 1, isReturn = 1, hasCtrlDep = 1, + usesCustomInserter = 1, Namespace = "R600" in { + def RETURN : ILFormat<(outs), (ins variable_ops), + "RETURN", [(AMDGPUendpgm)] + >; +} let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { @@ -219,34 +225,6 @@ } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 -def TEX_SHADOW : PatLeaf< - (imm), - [{uint32_t TType = (uint32_t)N->getZExtValue(); - return (TType >= 6 && TType <= 8) || TType == 13; - }] ->; - -def TEX_RECT : PatLeaf< - (imm), - [{uint32_t TType = (uint32_t)N->getZExtValue(); - return TType == 5; - }] ->; - -def TEX_ARRAY : PatLeaf< - (imm), - [{uint32_t TType = (uint32_t)N->getZExtValue(); - return TType == 9 || TType == 10 || TType == 16; - }] ->; - -def TEX_SHADOW_ARRAY : PatLeaf< - (imm), - [{uint32_t TType = (uint32_t)N->getZExtValue(); - return TType == 11 || TType == 12 || TType == 17; - }] ->; - class EG_CF_RAT cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask, dag outs, dag ins, string asm, list pattern> : InstR600ISA , @@ -357,6 +335,8 @@ // R600 SDNodes //===----------------------------------------------------------------------===// +let Namespace = "R600" in { + def INTERP_PAIR_XY : AMDGPUShaderInst < (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1), (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2), @@ -369,6 +349,8 @@ "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1", []>; +} + def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, [SDNPVariadic] @@ -416,11 +398,15 @@ // Interpolation Instructions //===----------------------------------------------------------------------===// +let Namespace = "R600" in { + def INTERP_VEC_LOAD : AMDGPUShaderInst < (outs R600_Reg128:$dst), (ins i32imm:$src0), "INTERP_LOAD $src0 : $dst">; +} + def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { let bank_swizzle = 5; } @@ -660,7 +646,7 @@ let isCodeGenOnly = 1, isPseudo = 1 in { -let usesCustomInserter = 1 in { +let Namespace = "R600", usesCustomInserter = 1 in { class FABS : AMDGPUShaderInst < (outs rc:$dst), @@ -792,7 +778,9 @@ (ins immType:$imm), "", [] ->; +> { + let Namespace = "R600"; +} } // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 @@ -1007,7 +995,7 @@ } -let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { +let isCodeGenOnly = 1, isPseudo = 1, Namespace = "R600" in { class R600_VEC2OP pattern> : InstR600 <(outs R600_Reg32:$dst), (ins // Slot X UEM:$update_exec_mask_X, UP:$update_pred_X, WRITE:$write_X, @@ -1326,7 +1314,9 @@ // Regist loads and stores - for indirect addressing //===----------------------------------------------------------------------===// +let Namespace = "R600" in { defm R600_ : RegisterLoadStore ; +} // Hardcode channel to 0 // NOTE: LSHR is not available here. LSHR is per family instruction @@ -1378,11 +1368,12 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in { -def MASK_WRITE : AMDGPUShaderInst < +def MASK_WRITE : InstR600 < (outs), (ins R600_Reg32:$src), "MASK_WRITE $src", - [] + [], + NullALU >; } // End mayLoad = 0, mayStore = 0, hasSideEffects = 1 @@ -1413,7 +1404,7 @@ // Constant Buffer Addressing Support //===----------------------------------------------------------------------===// -let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { +let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "R600" in { def CONST_COPY : Instruction { let OutOperandList = (outs R600_Reg32:$dst); let InOperandList = (ins i32imm:$src); @@ -1536,23 +1527,6 @@ //===---------------------------------------------------------------------===// // Flow and Program control Instructions //===---------------------------------------------------------------------===// -class ILFormat pattern> -: Instruction { - - let Namespace = "AMDGPU"; - dag OutOperandList = outs; - dag InOperandList = ins; - let Pattern = pattern; - let AsmString = !strconcat(asmstr, "\n"); - let isPseudo = 1; - let Itinerary = NullALU; - bit hasIEEEFlag = 0; - bit hasZeroOpFlag = 0; - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; - let isCodeGenOnly = 1; -} multiclass BranchConditional { def _i32 : ILFormat<(outs), @@ -1584,23 +1558,14 @@ // Custom Inserter for Branches and returns, this eventually will be a // separate pass //===---------------------------------------------------------------------===// -let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { +let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1, + Namespace = "R600" in { def BRANCH : ILFormat<(outs), (ins brtarget:$target), "; Pseudo unconditional branch instruction", [(br bb:$target)]>; defm BRANCH_COND : BranchConditional; } -//===---------------------------------------------------------------------===// -// Return instruction -//===---------------------------------------------------------------------===// -let isTerminator = 1, isReturn = 1, hasCtrlDep = 1, - usesCustomInserter = 1 in { - def RETURN : ILFormat<(outs), (ins variable_ops), - "RETURN", [(AMDGPUendpgm)] - >; -} - //===----------------------------------------------------------------------===// // Branch Instructions //===----------------------------------------------------------------------===// @@ -1731,7 +1696,7 @@ // KIL Patterns def KIL : R600Pat < - (int_AMDGPU_kill f32:$src0), + (int_r600_kill f32:$src0), (MASK_WRITE (KILLGT (f32 ZERO), $src0)) >; Index: lib/Target/AMDGPU/R600MachineScheduler.cpp =================================================================== --- lib/Target/AMDGPU/R600MachineScheduler.cpp +++ lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -162,7 +162,7 @@ for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(), E = SU->getInstr()->operands_end(); It != E; ++It) { MachineOperand &MO = *It; - if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) + if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X) ++CurEmitted; } } @@ -181,7 +181,7 @@ static bool isPhysicalRegCopy(MachineInstr *MI) { - if (MI->getOpcode() != AMDGPU::COPY) + if (MI->getOpcode() != R600::COPY) return false; return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()); @@ -224,14 +224,14 @@ return AluTrans; switch (MI->getOpcode()) { - case AMDGPU::PRED_X: + case R600::PRED_X: return AluPredX; - case AMDGPU::INTERP_PAIR_XY: - case AMDGPU::INTERP_PAIR_ZW: - case AMDGPU::INTERP_VEC_LOAD: - case AMDGPU::DOT_4: + case R600::INTERP_PAIR_XY: + case R600::INTERP_PAIR_ZW: + case R600::INTERP_VEC_LOAD: + case R600::DOT_4: return AluT_XYZW; - case AMDGPU::COPY: + case R600::COPY: if (MI->getOperand(1).isUndef()) { // MI will become a KILL, don't considers it in scheduling return AluDiscarded; @@ -246,7 +246,7 @@ if(TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode()) || TII->isReductionOp(MI->getOpcode()) || - MI->getOpcode() == AMDGPU::GROUP_BARRIER) { + MI->getOpcode() == R600::GROUP_BARRIER) { return AluT_XYZW; } @@ -257,13 +257,13 @@ // Is the result already assigned to a channel ? unsigned DestSubReg = MI->getOperand(0).getSubReg(); switch (DestSubReg) { - case AMDGPU::sub0: + case R600::sub0: return AluT_X; - case AMDGPU::sub1: + case R600::sub1: return AluT_Y; - case AMDGPU::sub2: + case R600::sub2: return AluT_Z; - case AMDGPU::sub3: + case R600::sub3: return AluT_W; default: break; @@ -271,16 +271,16 @@ // Is the result already member of a X/Y/Z/W class ? unsigned DestReg = MI->getOperand(0).getReg(); - if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) || - regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass)) + if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) || + regBelongsToClass(DestReg, &R600::R600_AddrRegClass)) return AluT_X; - if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass)) + if (regBelongsToClass(DestReg, &R600::R600_TReg32_YRegClass)) return AluT_Y; - if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass)) + if (regBelongsToClass(DestReg, &R600::R600_TReg32_ZRegClass)) return AluT_Z; - if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass)) + if (regBelongsToClass(DestReg, &R600::R600_TReg32_WRegClass)) return AluT_W; - if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass)) + if (regBelongsToClass(DestReg, &R600::R600_Reg128RegClass)) return AluT_XYZW; // LDS src registers cannot be used in the Trans slot. @@ -301,13 +301,13 @@ } switch (Opcode) { - case AMDGPU::PRED_X: - case AMDGPU::COPY: - case AMDGPU::CONST_COPY: - case AMDGPU::INTERP_PAIR_XY: - case AMDGPU::INTERP_PAIR_ZW: - case AMDGPU::INTERP_VEC_LOAD: - case AMDGPU::DOT_4: + case R600::PRED_X: + case R600::COPY: + case R600::CONST_COPY: + case R600::INTERP_PAIR_XY: + case R600::INTERP_PAIR_ZW: + case R600::INTERP_VEC_LOAD: + case R600::DOT_4: return IDAlu; default: return IDOther; @@ -353,7 +353,7 @@ } void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { - int DstIndex = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst); + int DstIndex = TII->getOperandIdx(MI->getOpcode(), R600::OpName::dst); if (DstIndex == -1) { return; } @@ -370,16 +370,16 @@ // Constrains the regclass of DestReg to assign it to Slot switch (Slot) { case 0: - MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass); + MRI->constrainRegClass(DestReg, &R600::R600_TReg32_XRegClass); break; case 1: - MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass); + MRI->constrainRegClass(DestReg, &R600::R600_TReg32_YRegClass); break; case 2: - MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass); + MRI->constrainRegClass(DestReg, &R600::R600_TReg32_ZRegClass); break; case 3: - MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass); + MRI->constrainRegClass(DestReg, &R600::R600_TReg32_WRegClass); break; } } Index: lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp =================================================================== --- lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp +++ lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp @@ -79,7 +79,7 @@ std::vector UndefReg; RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { - assert(MI->getOpcode() == AMDGPU::REG_SEQUENCE); + assert(MI->getOpcode() == R600::REG_SEQUENCE); for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) { MachineOperand &MO = Instr->getOperand(i); unsigned Chan = Instr->getOperand(i + 1).getImm(); @@ -159,8 +159,8 @@ if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) return true; switch (MI.getOpcode()) { - case AMDGPU::R600_ExportSwz: - case AMDGPU::EG_ExportSwz: + case R600::R600_ExportSwz: + case R600::EG_ExportSwz: return true; default: return false; @@ -213,12 +213,12 @@ std::vector UpdatedUndef = BaseRSI->UndefReg; for (DenseMap::iterator It = RSI->RegToChan.begin(), E = RSI->RegToChan.end(); It != E; ++It) { - unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass); + unsigned DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass); unsigned SubReg = (*It).first; unsigned Swizzle = (*It).second; unsigned Chan = getReassignedChan(RemapChan, Swizzle); - MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG), + MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(R600::INSERT_SUBREG), DstReg) .addReg(SrcVec) .addReg(SubReg) @@ -234,7 +234,7 @@ SrcVec = DstReg; } MachineInstr *NewMI = - BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg).addReg(SrcVec); + BuildMI(MBB, Pos, DL, TII->get(R600::COPY), Reg).addReg(SrcVec); LLVM_DEBUG(dbgs() << " ->"; NewMI->dump();); LLVM_DEBUG(dbgs() << " Updating Swizzle:\n"); @@ -354,7 +354,7 @@ for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end(); MII != MIIE; ++MII) { MachineInstr &MI = *MII; - if (MI.getOpcode() != AMDGPU::REG_SEQUENCE) { + if (MI.getOpcode() != R600::REG_SEQUENCE) { if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) { unsigned Reg = MI.getOperand(1).getReg(); for (MachineRegisterInfo::def_instr_iterator Index: lib/Target/AMDGPU/R600Packetizer.cpp =================================================================== --- lib/Target/AMDGPU/R600Packetizer.cpp +++ lib/Target/AMDGPU/R600Packetizer.cpp @@ -84,39 +84,39 @@ LastDstChan = BISlot; if (TII->isPredicated(*BI)) continue; - int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write); + int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write); if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0) continue; - int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst); + int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst); if (DstIdx == -1) { continue; } unsigned Dst = BI->getOperand(DstIdx).getReg(); if (isTrans || TII->isTransOnly(*BI)) { - Result[Dst] = AMDGPU::PS; + Result[Dst] = R600::PS; continue; } - if (BI->getOpcode() == AMDGPU::DOT4_r600 || - BI->getOpcode() == AMDGPU::DOT4_eg) { - Result[Dst] = AMDGPU::PV_X; + if (BI->getOpcode() == R600::DOT4_r600 || + BI->getOpcode() == R600::DOT4_eg) { + Result[Dst] = R600::PV_X; continue; } - if (Dst == AMDGPU::OQAP) { + if (Dst == R600::OQAP) { continue; } unsigned PVReg = 0; switch (TRI.getHWRegChan(Dst)) { case 0: - PVReg = AMDGPU::PV_X; + PVReg = R600::PV_X; break; case 1: - PVReg = AMDGPU::PV_Y; + PVReg = R600::PV_Y; break; case 2: - PVReg = AMDGPU::PV_Z; + PVReg = R600::PV_Z; break; case 3: - PVReg = AMDGPU::PV_W; + PVReg = R600::PV_W; break; default: llvm_unreachable("Invalid Chan"); @@ -129,9 +129,9 @@ void substitutePV(MachineInstr &MI, const DenseMap &PVs) const { unsigned Ops[] = { - AMDGPU::OpName::src0, - AMDGPU::OpName::src1, - AMDGPU::OpName::src2 + R600::OpName::src0, + R600::OpName::src1, + R600::OpName::src2 }; for (unsigned i = 0; i < 3; i++) { int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]); @@ -171,7 +171,7 @@ return true; if (!TII->isALUInstr(MI.getOpcode())) return true; - if (MI.getOpcode() == AMDGPU::GROUP_BARRIER) + if (MI.getOpcode() == R600::GROUP_BARRIER) return true; // XXX: This can be removed once the packetizer properly handles all the // LDS instruction group restrictions. @@ -185,8 +185,8 @@ if (getSlot(*MII) == getSlot(*MIJ)) ConsideredInstUsesAlreadyWrittenVectorElement = true; // Does MII and MIJ share the same pred_sel ? - int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel), - OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel); + int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel), + OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel); unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0, PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0; if (PredI != PredJ) @@ -220,7 +220,7 @@ } void setIsLastBit(MachineInstr *MI, unsigned Bit) const { - unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last); + unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last); MI->getOperand(LastOp).setImm(Bit); } @@ -301,11 +301,11 @@ for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) { MachineInstr *MI = CurrentPacketMIs[i]; unsigned Op = TII->getOperandIdx(MI->getOpcode(), - AMDGPU::OpName::bank_swizzle); + R600::OpName::bank_swizzle); MI->getOperand(Op).setImm(BS[i]); } unsigned Op = - TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::bank_swizzle); + TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle); MI.getOperand(Op).setImm(BS.back()); if (!CurrentPacketMIs.empty()) setIsLastBit(CurrentPacketMIs.back(), 0); @@ -334,6 +334,7 @@ // DFA state table should not be empty. assert(Packetizer.getResourceTracker() && "Empty DFA table!"); + assert(Packetizer.getResourceTracker()->getInstrItins()); if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty()) return false; @@ -353,8 +354,8 @@ MachineBasicBlock::iterator End = MBB->end(); MachineBasicBlock::iterator MI = MBB->begin(); while (MI != End) { - if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF || - (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) { + if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF || + (MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) { MachineBasicBlock::iterator DeleteMI = MI; ++MI; MBB->erase(DeleteMI); Index: lib/Target/AMDGPU/R600Processors.td =================================================================== --- lib/Target/AMDGPU/R600Processors.td +++ lib/Target/AMDGPU/R600Processors.td @@ -7,6 +7,62 @@ // //===----------------------------------------------------------------------===// +class SubtargetFeatureFetchLimit : + SubtargetFeature <"fetch"#Value, + "TexVTXClauseSize", + Value, + "Limit the maximum number of fetches in a clause to "#Value +>; + +def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst", + "R600ALUInst", + "false", + "Older version of ALU instructions encoding" +>; + +def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">; +def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">; + +def FeatureVertexCache : SubtargetFeature<"HasVertexCache", + "HasVertexCache", + "true", + "Specify use of dedicated vertex cache" +>; + +def FeatureCaymanISA : SubtargetFeature<"caymanISA", + "CaymanISA", + "true", + "Use Cayman ISA" +>; + +def FeatureCFALUBug : SubtargetFeature<"cfalubug", + "CFALUBug", + "true", + "GPU has CF_ALU bug" +>; + +class R600SubtargetFeatureGeneration Implies> : + SubtargetFeatureGeneration ; + +def FeatureR600 : R600SubtargetFeatureGeneration<"R600", + [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0] +>; + +def FeatureR700 : R600SubtargetFeatureGeneration<"R700", + [FeatureFetchLimit16, FeatureLocalMemorySize0] +>; + +def FeatureEvergreen : R600SubtargetFeatureGeneration<"EVERGREEN", + [FeatureFetchLimit16, FeatureLocalMemorySize32768] +>; + +def FeatureNorthernIslands : R600SubtargetFeatureGeneration<"NORTHERN_ISLANDS", + [FeatureFetchLimit16, FeatureWavefrontSize64, + FeatureLocalMemorySize32768] +>; + + //===----------------------------------------------------------------------===// // Radeon HD 2000/3000 Series (R600). //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/R600RegisterInfo.h =================================================================== --- lib/Target/AMDGPU/R600RegisterInfo.h +++ lib/Target/AMDGPU/R600RegisterInfo.h @@ -15,13 +15,14 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_R600REGISTERINFO_H #define LLVM_LIB_TARGET_AMDGPU_R600REGISTERINFO_H -#include "AMDGPURegisterInfo.h" +#define GET_REGINFO_HEADER +#include "R600GenRegisterInfo.inc" namespace llvm { class AMDGPUSubtarget; -struct R600RegisterInfo final : public AMDGPURegisterInfo { +struct R600RegisterInfo final : public R600GenRegisterInfo { RegClassWeight RCW; R600RegisterInfo(); @@ -49,6 +50,8 @@ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; + + void reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const; }; } // End namespace llvm Index: lib/Target/AMDGPU/R600RegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/R600RegisterInfo.cpp +++ lib/Target/AMDGPU/R600RegisterInfo.cpp @@ -21,34 +21,37 @@ using namespace llvm; -R600RegisterInfo::R600RegisterInfo() : AMDGPURegisterInfo() { +R600RegisterInfo::R600RegisterInfo() : R600GenRegisterInfo(0) { RCW.RegWeight = 0; RCW.WeightLimit = 0; } +#define GET_REGINFO_TARGET_DESC +#include "R600GenRegisterInfo.inc" + BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const R600Subtarget &ST = MF.getSubtarget(); const R600InstrInfo *TII = ST.getInstrInfo(); - reserveRegisterTuples(Reserved, AMDGPU::ZERO); - reserveRegisterTuples(Reserved, AMDGPU::HALF); - reserveRegisterTuples(Reserved, AMDGPU::ONE); - reserveRegisterTuples(Reserved, AMDGPU::ONE_INT); - reserveRegisterTuples(Reserved, AMDGPU::NEG_HALF); - reserveRegisterTuples(Reserved, AMDGPU::NEG_ONE); - reserveRegisterTuples(Reserved, AMDGPU::PV_X); - reserveRegisterTuples(Reserved, AMDGPU::ALU_LITERAL_X); - reserveRegisterTuples(Reserved, AMDGPU::ALU_CONST); - reserveRegisterTuples(Reserved, AMDGPU::PREDICATE_BIT); - reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_OFF); - reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_ZERO); - reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_ONE); - reserveRegisterTuples(Reserved, AMDGPU::INDIRECT_BASE_ADDR); - - for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(), - E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) { + reserveRegisterTuples(Reserved, R600::ZERO); + reserveRegisterTuples(Reserved, R600::HALF); + reserveRegisterTuples(Reserved, R600::ONE); + reserveRegisterTuples(Reserved, R600::ONE_INT); + reserveRegisterTuples(Reserved, R600::NEG_HALF); + reserveRegisterTuples(Reserved, R600::NEG_ONE); + reserveRegisterTuples(Reserved, R600::PV_X); + reserveRegisterTuples(Reserved, R600::ALU_LITERAL_X); + reserveRegisterTuples(Reserved, R600::ALU_CONST); + reserveRegisterTuples(Reserved, R600::PREDICATE_BIT); + reserveRegisterTuples(Reserved, R600::PRED_SEL_OFF); + reserveRegisterTuples(Reserved, R600::PRED_SEL_ZERO); + reserveRegisterTuples(Reserved, R600::PRED_SEL_ONE); + reserveRegisterTuples(Reserved, R600::INDIRECT_BASE_ADDR); + + for (TargetRegisterClass::iterator I = R600::R600_AddrRegClass.begin(), + E = R600::R600_AddrRegClass.end(); I != E; ++I) { reserveRegisterTuples(Reserved, *I); } @@ -58,7 +61,7 @@ } // Dummy to not crash RegisterClassInfo. -static const MCPhysReg CalleeSavedReg = AMDGPU::NoRegister; +static const MCPhysReg CalleeSavedReg = R600::NoRegister; const MCPhysReg *R600RegisterInfo::getCalleeSavedRegs( const MachineFunction *) const { @@ -66,7 +69,7 @@ } unsigned R600RegisterInfo::getFrameRegister(const MachineFunction &MF) const { - return AMDGPU::NoRegister; + return R600::NoRegister; } unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const { @@ -81,7 +84,7 @@ MVT VT) const { switch(VT.SimpleTy) { default: - case MVT::i32: return &AMDGPU::R600_TReg32RegClass; + case MVT::i32: return &R600::R600_TReg32RegClass; } } @@ -94,9 +97,9 @@ assert(!TargetRegisterInfo::isVirtualRegister(Reg)); switch (Reg) { - case AMDGPU::OQAP: - case AMDGPU::OQBP: - case AMDGPU::AR_X: + case R600::OQAP: + case R600::OQBP: + case R600::AR_X: return false; default: return true; @@ -109,3 +112,10 @@ RegScavenger *RS) const { llvm_unreachable("Subroutines not supported yet"); } + +void R600RegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const { + MCRegAliasIterator R(Reg, this, true); + + for (; R.isValid(); ++R) + Reserved.set(*R); +} Index: lib/Target/AMDGPU/R600RegisterInfo.td =================================================================== --- lib/Target/AMDGPU/R600RegisterInfo.td +++ lib/Target/AMDGPU/R600RegisterInfo.td @@ -245,7 +245,7 @@ (add V0123_W, V0123_Z, V0123_Y, V0123_X) >; -def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64, +def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32, i64, f64], 64, (add (sequence "T%u_XY", 0, 63))>; def R600_Reg64Vertical : RegisterClass<"AMDGPU", [v2f32, v2i32], 64, Index: lib/Target/AMDGPU/R700Instructions.td =================================================================== --- lib/Target/AMDGPU/R700Instructions.td +++ lib/Target/AMDGPU/R700Instructions.td @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -def isR700 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::R700">; +def isR700 : Predicate<"Subtarget->getGeneration() == R600Subtarget::R700">; let Predicates = [isR700] in { def SIN_r700 : SIN_Common<0x6E>; Index: lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- lib/Target/AMDGPU/SIFoldOperands.cpp +++ lib/Target/AMDGPU/SIFoldOperands.cpp @@ -76,7 +76,7 @@ MachineRegisterInfo *MRI; const SIInstrInfo *TII; const SIRegisterInfo *TRI; - const SISubtarget *ST; + const AMDGPUSubtarget *ST; void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, @@ -972,7 +972,7 @@ return false; MRI = &MF.getRegInfo(); - ST = &MF.getSubtarget(); + ST = &MF.getSubtarget(); TII = ST->getInstrInfo(); TRI = &TII->getRegisterInfo(); Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -22,6 +22,9 @@ namespace llvm { class SITargetLowering final : public AMDGPUTargetLowering { +private: + const SISubtarget *Subtarget; + SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, uint64_t Offset) const; SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -113,7 +113,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, const SISubtarget &STI) - : AMDGPUTargetLowering(TM, STI) { + : AMDGPUTargetLowering(TM, STI), + Subtarget(&STI) { addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass); addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); @@ -145,7 +146,7 @@ addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32_XM0RegClass); } - computeRegisterProperties(STI.getRegisterInfo()); + computeRegisterProperties(Subtarget->getRegisterInfo()); // We need to custom lower vector stores from local memory setOperationAction(ISD::LOAD, MVT::v2i32, Custom); @@ -312,7 +313,7 @@ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Expand); - if (getSubtarget()->hasFlatAddressSpace()) { + if (Subtarget->hasFlatAddressSpace()) { setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom); setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom); } @@ -325,6 +326,44 @@ setOperationAction(ISD::TRAP, MVT::Other, Custom); setOperationAction(ISD::DEBUGTRAP, MVT::Other, Custom); + if (Subtarget->has16BitInsts()) { + setOperationAction(ISD::FLOG, MVT::f16, Custom); + setOperationAction(ISD::FLOG10, MVT::f16, Custom); + } + + // v_mad_f32 does not support denormals according to some sources. + if (!Subtarget->hasFP32Denormals()) + setOperationAction(ISD::FMAD, MVT::f32, Legal); + + if (!Subtarget->hasBFI()) { + // fcopysign can be done in a single instruction with BFI. + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + } + + if (!Subtarget->hasBCNT(32)) + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + + if (!Subtarget->hasBCNT(64)) + setOperationAction(ISD::CTPOP, MVT::i64, Expand); + + if (Subtarget->hasFFBH()) + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); + + if (Subtarget->hasFFBL()) + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); + + // We only really have 32-bit BFE instructions (and 16-bit on VI). + // + // On SI+ there are 64-bit BFEs, but they are scalar only and there isn't any + // effort to match them now. We want this to be false for i64 cases when the + // extraction isn't restricted to the upper or lower half. Ideally we would + // have some pass reduce 64-bit extracts to 32-bit if possible. Extracts that + // span the midpoint are probably relatively rare, so don't worry about them + // for now. + if (Subtarget->hasBFE()) + setHasExtractBitsInsn(true); + setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); @@ -332,6 +371,11 @@ setOperationAction(ISD::FTRUNC, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FRINT, MVT::f64, Legal); + } else { + setOperationAction(ISD::FCEIL, MVT::f64, Custom); + setOperationAction(ISD::FTRUNC, MVT::f64, Custom); + setOperationAction(ISD::FRINT, MVT::f64, Custom); + setOperationAction(ISD::FFLOOR, MVT::f64, Custom); } setOperationAction(ISD::FFLOOR, MVT::f64, Legal); @@ -575,10 +619,15 @@ setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX); setSchedulingPreference(Sched::RegPressure); + + // SI at least has hardware support for floating point exceptions, but no way + // of using or handling them is implemented. They are also optional in OpenCL + // (Section 7.3) + setHasFloatingPointExceptions(Subtarget->hasFPExceptions()); } const SISubtarget *SITargetLowering::getSubtarget() const { - return static_cast(Subtarget); + return Subtarget; } //===----------------------------------------------------------------------===// @@ -1944,8 +1993,7 @@ // FIXME: Does sret work properly? if (!Info->isEntryFunction()) { - const SIRegisterInfo *TRI - = static_cast(Subtarget)->getRegisterInfo(); + const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); if (I) { @@ -2047,8 +2095,7 @@ SelectionDAG &DAG = CLI.DAG; const SDLoc &DL = CLI.DL; - const SISubtarget *ST = getSubtarget(); - const SIRegisterInfo *TRI = ST->getRegisterInfo(); + const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); auto &ArgUsageInfo = DAG.getPass()->getAnalysis(); @@ -2486,7 +2533,7 @@ // Add a register mask operand representing the call-preserved registers. - const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); + auto *TRI = static_cast(Subtarget->getRegisterInfo()); const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -7695,8 +7742,7 @@ MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *Info = MF.getInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); - const SISubtarget &ST = MF.getSubtarget(); - const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); if (Info->isEntryFunction()) { // Callable functions have fixed registers used for stack access. Index: lib/Target/AMDGPU/SIInsertWaitcnts.cpp =================================================================== --- lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -930,8 +930,7 @@ // All waits must be resolved at call return. // NOTE: this could be improved with knowledge of all call sites or // with knowledge of the called routines. - if (MI.getOpcode() == AMDGPU::RETURN || - MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG || + if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG || MI.getOpcode() == AMDGPU::S_SETPC_B64_return) { for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; T = (enum InstCounterType)(T + 1)) { @@ -1127,7 +1126,7 @@ // TODO: Remove this work-around, enable the assert for Bug 457939 // after fixing the scheduler. Also, the Shader Compiler code is // independent of target. - if (readsVCCZ(MI) && ST->getGeneration() <= SISubtarget::SEA_ISLANDS) { + if (readsVCCZ(MI) && ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) { if (ScoreBrackets->getScoreLB(LGKM_CNT) < ScoreBrackets->getScoreUB(LGKM_CNT) && ScoreBrackets->hasPendingSMEM()) { @@ -1712,7 +1711,7 @@ if (ScoreBrackets->getScoreLB(LGKM_CNT) < ScoreBrackets->getScoreUB(LGKM_CNT) && ScoreBrackets->hasPendingSMEM()) { - if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) + if (ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) VCCZBugWorkAround = true; } } Index: lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/SIInstrFormats.td +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -21,7 +21,7 @@ class InstSI pattern = []> : - AMDGPUInst, PredicateControl { + AMDGPUInst, GCNPredicateControl { let SubtargetPredicate = isGCN; // Low bits - basic encoding information. Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -31,6 +31,9 @@ #include #include +#define GET_INSTRINFO_HEADER +#include "AMDGPUGenInstrInfo.inc" + namespace llvm { class APInt; @@ -39,7 +42,7 @@ class SISubtarget; class TargetRegisterClass; -class SIInstrInfo final : public AMDGPUInstrInfo { +class SIInstrInfo final : public AMDGPUGenInstrInfo { private: const SIRegisterInfo RI; const SISubtarget &ST; @@ -163,7 +166,10 @@ bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1, MachineInstr &SecondLdSt, unsigned BaseReg2, - unsigned NumLoads) const final; + unsigned NumLoads) const override; + + bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, + int64_t Offset1, unsigned NumLoads) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, @@ -879,6 +885,12 @@ static bool isLegalMUBUFImmOffset(unsigned Imm) { return isUInt<12>(Imm); } + + /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. + /// Return -1 if the target-specific opcode for the pseudo instruction does + /// not exist. If Opcode is not a pseudo instruction, this is identity. + int pseudoToMCOpcode(int Opcode) const; + }; namespace AMDGPU { Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -14,6 +14,7 @@ #include "SIInstrInfo.h" #include "AMDGPU.h" +#include "AMDGPUIntrinsicInfo.h" #include "AMDGPUSubtarget.h" #include "GCNHazardRecognizer.h" #include "SIDefines.h" @@ -63,6 +64,20 @@ using namespace llvm; +#define GET_INSTRINFO_CTOR_DTOR +#include "AMDGPUGenInstrInfo.inc" + +namespace llvm { +namespace AMDGPU { +#define GET_RSRCINTRINSIC_IMPL +#include "AMDGPUGenSearchableTables.inc" + +#define GET_D16IMAGEDIMINTRINSIC_IMPL +#include "AMDGPUGenSearchableTables.inc" +} +} + + // Must be at least 4 to be able to branch over minimum unconditional branch // code. This is only for making it possible to write reasonably small tests for // long branches. @@ -71,7 +86,8 @@ cl::desc("Restrict range of branch instructions (DEBUG)")); SIInstrInfo::SIInstrInfo(const SISubtarget &ST) - : AMDGPUInstrInfo(ST), RI(ST), ST(ST) {} + : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), + RI(ST), ST(ST) {} //===----------------------------------------------------------------------===// // TargetInstrInfo callbacks @@ -438,6 +454,28 @@ return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold; } +// FIXME: This behaves strangely. If, for example, you have 32 load + stores, +// the first 16 loads will be interleaved with the stores, and the next 16 will +// be clustered as expected. It should really split into 2 16 store batches. +// +// Loads are clustered until this returns false, rather than trying to schedule +// groups of stores. This also means we have to deal with saying different +// address space loads should be clustered, and ones which might cause bank +// conflicts. +// +// This might be deprecated so it might not be worth that much effort to fix. +bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, + int64_t Offset0, int64_t Offset1, + unsigned NumLoads) const { + assert(Offset1 > Offset0 && + "Second offset should be larger than first offset!"); + // If we have less than 16 loads in a row, and the offsets are within 64 + // bytes, then schedule together. + + // A cacheline is 64 bytes (for global memory). + return (NumLoads <= 16 && (Offset1 - Offset0) < 64); +} + static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, @@ -998,7 +1036,7 @@ unsigned FrameOffset, unsigned Size) const { MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); - const SISubtarget &ST = MF->getSubtarget(); + const AMDGPUSubtarget &ST = MF->getSubtarget(); DebugLoc DL = MBB.findDebugLoc(MI); unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize(); unsigned WavefrontSize = ST.getWavefrontSize(); @@ -1134,7 +1172,7 @@ MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MBB.findDebugLoc(MI); switch (MI.getOpcode()) { - default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); + default: return TargetInstrInfo::expandPostRAPseudo(MI); case AMDGPU::S_MOV_B64_term: // This is only a terminator to get the correct spill code placement during // register allocation. @@ -1900,16 +1938,16 @@ switch(Kind) { case PseudoSourceValue::Stack: case PseudoSourceValue::FixedStack: - return AMDGPUASI.PRIVATE_ADDRESS; + return ST.getAMDGPUAS().PRIVATE_ADDRESS; case PseudoSourceValue::ConstantPool: case PseudoSourceValue::GOT: case PseudoSourceValue::JumpTable: case PseudoSourceValue::GlobalValueCallEntry: case PseudoSourceValue::ExternalSymbolCallEntry: case PseudoSourceValue::TargetCustom: - return AMDGPUASI.CONSTANT_ADDRESS; + return ST.getAMDGPUAS().CONSTANT_ADDRESS; } - return AMDGPUASI.FLAT_ADDRESS; + return ST.getAMDGPUAS().FLAT_ADDRESS; } static void removeModOperands(MachineInstr &MI) { @@ -4649,7 +4687,7 @@ return AMDGPU::NoRegister; assert(!MI.memoperands_empty() && - (*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS); + (*MI.memoperands_begin())->getAddrSpace() == ST.getAMDGPUAS().PRIVATE_ADDRESS); FrameIndex = Addr->getIndex(); return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); @@ -4768,7 +4806,7 @@ return true; for (const MachineMemOperand *MMO : MI.memoperands()) { - if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS) + if (MMO->getAddrSpace() == ST.getAMDGPUAS().FLAT_ADDRESS) return true; } return false; @@ -4948,3 +4986,56 @@ const auto RCID = MI.getDesc().OpInfo[Idx].RegClass; return RCID == AMDGPU::SReg_128RegClassID; } + +// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td +enum SIEncodingFamily { + SI = 0, + VI = 1, + SDWA = 2, + SDWA9 = 3, + GFX80 = 4, + GFX9 = 5 +}; + +static SIEncodingFamily subtargetEncodingFamily(const SISubtarget &ST) { + switch (ST.getGeneration()) { + case SISubtarget::SOUTHERN_ISLANDS: + case SISubtarget::SEA_ISLANDS: + return SIEncodingFamily::SI; + case SISubtarget::VOLCANIC_ISLANDS: + case SISubtarget::GFX9: + return SIEncodingFamily::VI; + } + llvm_unreachable("Unknown subtarget generation!"); +} + +int SIInstrInfo::pseudoToMCOpcode(int Opcode) const { + SIEncodingFamily Gen = subtargetEncodingFamily(ST); + + if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 && + ST.getGeneration() >= SISubtarget::GFX9) + Gen = SIEncodingFamily::GFX9; + + if (get(Opcode).TSFlags & SIInstrFlags::SDWA) + Gen = ST.getGeneration() == SISubtarget::GFX9 ? SIEncodingFamily::SDWA9 + : SIEncodingFamily::SDWA; + // Adjust the encoding family to GFX80 for D16 buffer instructions when the + // subtarget has UnpackedD16VMem feature. + // TODO: remove this when we discard GFX80 encoding. + if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16) + && !(get(Opcode).TSFlags & SIInstrFlags::MIMG)) + Gen = SIEncodingFamily::GFX80; + + int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); + + // -1 means that Opcode is already a native instruction. + if (MCOp == -1) + return Opcode; + + // (uint16_t)-1 means that Opcode is a pseudo instruction that has + // no encoding in the given subtarget generation. + if (MCOp == (uint16_t)-1) + return -1; + + return MCOp; +} Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -17,6 +17,11 @@ def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">; +class GCNPredicateControl : PredicateControl { + Predicate SIAssemblerPredicate = isSICI; + Predicate VIAssemblerPredicate = isVI; +} + // Execpt for the NONE field, this must be kept in sync with the // SIEncodingFamily enum in AMDGPUInstrInfo.cpp def SIEncodingFamily { Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -11,11 +11,10 @@ // that are not yet supported remain commented out. //===----------------------------------------------------------------------===// -class GCNPat : AMDGPUPat { +class GCNPat : Pat, GCNPredicateControl { let SubtargetPredicate = isGCN; } - include "VOPInstructions.td" include "SOPInstructions.td" include "SMInstructions.td" Index: lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.h +++ lib/Target/AMDGPU/SIRegisterInfo.h @@ -21,6 +21,7 @@ namespace llvm { +class AMDGPUSubtarget; class LiveIntervals; class MachineRegisterInfo; class SISubtarget; Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1232,8 +1232,6 @@ &AMDGPU::VReg_512RegClass, &AMDGPU::SReg_512RegClass, &AMDGPU::SCC_CLASSRegClass, - &AMDGPU::R600_Reg32RegClass, - &AMDGPU::R600_PredicateRegClass, &AMDGPU::Pseudo_SReg_32RegClass, &AMDGPU::Pseudo_SReg_128RegClass, }; Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -93,7 +93,7 @@ /// \returns Maximum number of waves per execution unit for given subtarget \p /// Features without any kind of limitation. -unsigned getMaxWavesPerEU(const FeatureBitset &Features); +unsigned getMaxWavesPerEU(); /// \returns Maximum number of waves per execution unit for given subtarget \p /// Features and limited by given \p FlatWorkGroupSize. Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -228,7 +228,7 @@ if (Features.test(FeatureGFX9)) return {9, 0, 0}; - if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) + if (Features.test(FeatureSouthernIslands)) return {0, 0, 0}; return {7, 0, 0}; } @@ -286,7 +286,7 @@ } unsigned getMaxWavesPerCU(const FeatureBitset &Features) { - return getMaxWavesPerEU(Features) * getEUsPerCU(Features); + return getMaxWavesPerEU() * getEUsPerCU(Features); } unsigned getMaxWavesPerCU(const FeatureBitset &Features, @@ -298,9 +298,7 @@ return 1; } -unsigned getMaxWavesPerEU(const FeatureBitset &Features) { - if (!Features.test(FeatureGCN)) - return 8; +unsigned getMaxWavesPerEU() { // FIXME: Need to take scratch memory into account. return 10; } @@ -356,7 +354,7 @@ unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { assert(WavesPerEU != 0); - if (WavesPerEU >= getMaxWavesPerEU(Features)) + if (WavesPerEU >= getMaxWavesPerEU()) return 0; unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1); @@ -400,7 +398,7 @@ unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { assert(WavesPerEU != 0); - if (WavesPerEU >= getMaxWavesPerEU(Features)) + if (WavesPerEU >= getMaxWavesPerEU()) return 0; unsigned MinNumVGPRs = alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), @@ -724,6 +722,8 @@ case node: return isGFX9(STI) ? node##_gfx9 : node##_vi; unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { + if (STI.getTargetTriple().getArch() == Triple::r600) + return Reg; MAP_REG2REG }