Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1002,7 +1002,7 @@ (OffsetBits == 8 && !isUInt<8>(Offset))) return false; - if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || + if (Subtarget->hasUsableDSOffset() || Subtarget->unsafeDSOffsetFoldingEnabled()) return true; @@ -1263,7 +1263,7 @@ SDValue Ptr, Offen, Idxen, Addr64; // addr64 bit was removed for volcanic islands. - if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (!Subtarget->hasAddr64()) return false; if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, @@ -2234,9 +2234,8 @@ } bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const { - if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) { - return false; - } + assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn); + const SIRegisterInfo *SIRI = static_cast(Subtarget->getRegisterInfo()); const SIInstrInfo * SII = Index: lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -111,7 +111,7 @@ // integer types. if ((PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) && - ST.getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) + !ST.hasUsableDSOffset()) continue; // FIXME: We can replace this with equivalent alias.scope/noalias Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -455,6 +455,12 @@ return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS); } + // Return true if the target only has the reverse operand versions of VALU + // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32). + bool hasOnlyRevVALUShifts() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + bool hasBFE() const { return true; } @@ -511,10 +517,48 @@ return EnableHugePrivateBuffer; } + /// True if the offset field of DS instructions works as expected. On SI, the + /// offset uses a 16-bit adder and does not always wrap properly. + bool hasUsableDSOffset() const { + return getGeneration() >= SEA_ISLANDS; + } + bool unsafeDSOffsetFoldingEnabled() const { return EnableUnsafeDSOffsetFolding; } + /// Condition output from div_scale is usable. + bool hasUsableDivScaleConditionOutput() const { + return getGeneration() != SOUTHERN_ISLANDS; + } + + /// Extra wait hazard is needed in some cases before + /// s_cbranch_vccnz/s_cbranch_vccz. + bool hasReadVCCZBug() const { + return getGeneration() <= SEA_ISLANDS; + } + + /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR + /// was written by a VALU instruction. + bool hasSMRDReadVALUDefHazard() const { + return getGeneration() == SOUTHERN_ISLANDS; + } + + /// A read of an SGPR by a VMEM instruction requires 5 wait states when the + /// SGPR was written by a VALU Instruction. + bool hasVMEMReadSGPRVALUDefHazard() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + + bool hasRFEHazards() const { + return getGeneration() < VOLCANIC_ISLANDS; + } + + /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32. + unsigned getSetRegWaitStates() const { + return getGeneration() <= SEA_ISLANDS ? 1 : 2; + } + bool dumpCode() const { return DumpCode; } @@ -546,6 +590,11 @@ return CIInsts && EnableDS128; } + /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64 + bool haveRoundOpsF64() const { + return CIInsts; + } + /// \returns If MUBUF instructions always perform range checking, even for /// buffer resources used for private memory access. bool privateMemoryResourceIsRangeChecked() const { @@ -615,6 +664,10 @@ return hasD16LoadStore() && !isSRAMECCEnabled(); } + bool hasD16Images() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + /// Return if most LDS instructions have an m0 use that require m0 to be /// iniitalized. bool ldsRequiresM0Init() const { Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -401,7 +401,7 @@ if (SLT == MVT::f64) { int Cost = 4 * get64BitInstrCost() + 7 * getQuarterRateInstrCost(); // Add cost of workaround. - if (ST->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) + if (!ST->hasUsableDivScaleConditionOutput()) Cost += 3 * getFullRateInstrCost(); return LT.first * Cost * NElts; Index: lib/Target/AMDGPU/GCNHazardRecognizer.cpp =================================================================== --- lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -459,7 +459,7 @@ WaitStatesNeeded = checkSoftClauseHazards(SMRD); // This SMRD hazard only affects SI. - if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS) + if (!ST.hasSMRDReadVALUDefHazard()) return WaitStatesNeeded; // A read of an SGPR by SMRD instruction requires 4 wait states when the @@ -498,7 +498,7 @@ } int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { - if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (!ST.hasVMEMReadSGPRVALUDefHazard()) return 0; int WaitStatesNeeded = checkSoftClauseHazards(VMEM); @@ -578,8 +578,7 @@ const SIInstrInfo *TII = ST.getInstrInfo(); unsigned HWReg = getHWReg(TII, *SetRegInstr); - const int SetRegWaitStates = - ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ? 1 : 2; + const int SetRegWaitStates = ST.getSetRegWaitStates(); auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) { return HWReg == getHWReg(TII, *MI); }; @@ -725,7 +724,7 @@ } int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) { - if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (!ST.hasRFEHazards()) return 0; const SIInstrInfo *TII = ST.getInstrInfo(); Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -424,7 +424,7 @@ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); - if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { + if (Subtarget->haveRoundOpsF64()) { setOperationAction(ISD::FTRUNC, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FRINT, MVT::f64, Legal); @@ -2829,7 +2829,7 @@ } - if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS && + if (!Subtarget->hasFlatAddressSpace() && Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) { report_fatal_error(Twine("invalid register \"" + StringRef(RegName) + "\" for subtarget.")); @@ -4831,8 +4831,7 @@ MVT StoreVT = VData.getSimpleValueType(); if (StoreVT.getScalarType() == MVT::f16) { - if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS || - !BaseOpcode->HasD16) + if (!Subtarget->hasD16Images() || !BaseOpcode->HasD16) return Op; // D16 is unsupported for this instruction IsD16 = true; @@ -4845,8 +4844,7 @@ // and whether packing is supported. MVT LoadVT = ResultTypes[0].getSimpleVT(); if (LoadVT.getScalarType() == MVT::f16) { - if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS || - !BaseOpcode->HasD16) + if (!Subtarget->hasD16Images() || !BaseOpcode->HasD16) return Op; // D16 is unsupported for this instruction IsD16 = true; @@ -7043,7 +7041,7 @@ SDValue Scale; - if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) { + if (!Subtarget->hasUsableDivScaleConditionOutput()) { // Workaround a hardware bug on SI where the condition output from div_scale // is not usable. @@ -7157,7 +7155,7 @@ // out-of-bounds even if base + offsets is in bounds. Split vectorized // stores here to avoid emitting ds_write2_b32. We may re-combine the // store later in the SILoadStoreOptimizer. - if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS && + if (!Subtarget->hasUsableDSOffset() && NumElements == 2 && VT.getStoreSize() == 8 && Store->getAlignment() < 8) { return SplitVectorStore(Op, DAG); Index: lib/Target/AMDGPU/SIInsertWaitcnts.cpp =================================================================== --- lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1004,7 +1004,7 @@ // TODO: Remove this work-around, enable the assert for Bug 457939 // after fixing the scheduler. Also, the Shader Compiler code is // independent of target. - if (readsVCCZ(MI) && ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) { + if (readsVCCZ(MI) && ST->hasReadVCCZBug()) { if (ScoreBrackets.getScoreLB(LGKM_CNT) < ScoreBrackets.getScoreUB(LGKM_CNT) && ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) { Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4293,37 +4293,37 @@ continue; case AMDGPU::S_LSHL_B32: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.hasOnlyRevVALUShifts()) { NewOpcode = AMDGPU::V_LSHLREV_B32_e64; swapOperands(Inst); } break; case AMDGPU::S_ASHR_I32: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.hasOnlyRevVALUShifts()) { NewOpcode = AMDGPU::V_ASHRREV_I32_e64; swapOperands(Inst); } break; case AMDGPU::S_LSHR_B32: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.hasOnlyRevVALUShifts()) { NewOpcode = AMDGPU::V_LSHRREV_B32_e64; swapOperands(Inst); } break; case AMDGPU::S_LSHL_B64: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.hasOnlyRevVALUShifts()) { NewOpcode = AMDGPU::V_LSHLREV_B64; swapOperands(Inst); } break; case AMDGPU::S_ASHR_I64: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.hasOnlyRevVALUShifts()) { NewOpcode = AMDGPU::V_ASHRREV_I64; swapOperands(Inst); } break; case AMDGPU::S_LSHR_B64: - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (ST.hasOnlyRevVALUShifts()) { NewOpcode = AMDGPU::V_LSHRREV_B64; swapOperands(Inst); }