This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU: Add simple machine level computeKnownBits
AbandonedPublic

Authored by arsenm on Feb 28 2017, 10:59 AM.

Download Raw Diff

Details

Reviewers

Summary

The DAG makes it difficult/impossible to make the right
decisions when there are no direct equivalents between SALU
and VALU instructions.

For example, we can use the v_cvt_pk_u16_u32 instruction to
pack a v2i16 if the known high bits are 0. However there is no
scalar equivalent, so pattern matching this in the DAG
introduces divergences from what would be a simple bit pattern.
With known bits, we can pattern match this after it is known
to really be a vector op.

This also may be useful for the SDWA pass.

Diff Detail

Event Timeline

arsenm created this revision.Feb 28 2017, 10:59 AM

Herald added subscribers: tpr, dstuttard, tony-tye and 4 others. · View Herald TranscriptFeb 28 2017, 10:59 AM

Fix shift mask computation

We'll need something like this for GlobalISel anyway.

This revision is now accepted and ready to land.Mar 1 2017, 8:56 AM

t-tye added a subscriber: t-tye.Mar 22 2017, 6:40 PM

tony-tye removed a subscriber: tony-tye.Mar 22 2017, 6:47 PM

arsenm mentioned this in D55241: AMDGPU: Should always start from the first register in VGPR indexing..Dec 3 2018, 4:44 PM

arsenm abandoned this revision.Apr 4 2020, 6:28 PM

Herald added subscribers: kerbowa, jvesely. · View Herald TranscriptApr 4 2020, 6:28 PM

Revision Contents

Path

Size

lib/

Target/

AMDGPU/

SIInstrInfo.h

4 lines

SIInstrInfo.cpp

154 lines

Diff 90082

lib/Target/AMDGPU/SIInstrInfo.h

Show First 20 Lines • Show All 745 Lines • ▼ Show 20 Lines	public:
ScheduleHazardRecognizer *		ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,		CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const override;		const ScheduleDAG *DAG) const override;

ScheduleHazardRecognizer *		ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;		CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;

bool isBasicBlockPrologue(const MachineInstr &MI) const override;		bool isBasicBlockPrologue(const MachineInstr &MI) const override;

		void computeKnownBits(const MachineRegisterInfo &MRI, const MachineOperand &Op,
		uint64_t &KnownZero, uint64_t &KnownOne,
		unsigned Depth = 0) const;
};		};

namespace AMDGPU {		namespace AMDGPU {
LLVM_READONLY		LLVM_READONLY
int getVOPe64(uint16_t Opcode);		int getVOPe64(uint16_t Opcode);

LLVM_READONLY		LLVM_READONLY
int getVOPe32(uint16_t Opcode);		int getVOPe32(uint16_t Opcode);
▲ Show 20 Lines • Show All 53 Lines • Show Last 20 Lines

lib/Target/AMDGPU/SIInstrInfo.cpp

	Show First 20 Lines • Show All 3,878 Lines • ▼ Show 20 Lines
	SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {			SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
	return new GCNHazardRecognizer(MF);			return new GCNHazardRecognizer(MF);
	}			}

	bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {			bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
	return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&			return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
	MI.modifiesRegister(AMDGPU::EXEC, &RI);			MI.modifiesRegister(AMDGPU::EXEC, &RI);
	}			}

				static bool isImmOrMaterializedImm(const MachineRegisterInfo &MRI,
				const MachineOperand &Op,
				uint64_t &Imm) {
				if (Op.isImm()) {
				Imm = Op.getImm();
				return true;
				}

				if (Op.isReg()) {
				const MachineInstr *Def = MRI.getUniqueVRegDef(Op.getReg());
				if (!Def \|\| !Def->isMoveImmediate())
				return false;

				const MachineOperand &Src = Def->getOperand(1);
				if (Src.isImm()) {
				Imm = Src.getImm();
				return true;
				}

				return false;
				}

				return false;
				}

				static void computeKnownBitsShift(const SIInstrInfo *TII,
				const MachineRegisterInfo &MRI,
				unsigned Opcode,
				const MachineOperand &LHS,
				const MachineOperand &RHS,
				uint64_t &KnownZero,
				uint64_t &KnownOne,
				unsigned Depth) {
				uint64_t ShiftAmt;
				if (!isImmOrMaterializedImm(MRI, RHS, ShiftAmt))
				return;

				switch (Opcode) {
				case AMDGPU::S_LSHL_B32:
				case AMDGPU::V_LSHLREV_B32_e64:
				case AMDGPU::V_LSHLREV_B32_e32:
				case AMDGPU::V_LSHL_B32_e64:
				case AMDGPU::V_LSHL_B32_e32: {
				TII->computeKnownBits(MRI, LHS, KnownZero, KnownOne, Depth + 1);

				KnownZero <<= ShiftAmt;
				KnownOne <<= ShiftAmt;

				// Low bits are known zero.
				KnownZero \|= (UINT64_C(1) << ShiftAmt) - 1;
				return;
				}
				case AMDGPU::S_LSHR_B32:
				case AMDGPU::V_LSHRREV_B32_e64:
				case AMDGPU::V_LSHRREV_B32_e32:
				case AMDGPU::V_LSHR_B32_e64:
				case AMDGPU::V_LSHR_B32_e32: {
				TII->computeKnownBits(MRI, LHS, KnownZero, KnownOne, Depth + 1);

				KnownZero >>= ShiftAmt;
				KnownOne >>= ShiftAmt;

				// High bits are known zero.
				uint64_t Mask = (UINT64_C(1) << ShiftAmt) - 1;
				uint64_t HighBits = Mask << ShiftAmt;
				KnownZero \|= HighBits;
				return;
				}
				default:
				return;
				}
				}

				void SIInstrInfo::computeKnownBits(const MachineRegisterInfo &MRI,
				const MachineOperand &Op,
				uint64_t &KnownZero,
				uint64_t &KnownOne,
				unsigned Depth) const {
				if (Depth > 6)
				return;

				uint64_t ImmVal;
				if (isImmOrMaterializedImm(MRI, Op, ImmVal)) {
				KnownOne = ImmVal;
				KnownZero = ~ImmVal;
				return;
				}

				if (!Op.isReg())
				return;

				unsigned Reg = Op.getReg();
				const MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
				if (!Def)
				return;

				switch (Def->getOpcode()) {
				case AMDGPU::COPY: {
				const MachineOperand &Src = Def->getOperand(1);
				if (Src.getSubReg() != AMDGPU::NoSubRegister)
				return;

				computeKnownBits(MRI, Src, KnownZero, KnownOne, Depth + 1);
				return;
				}
				case AMDGPU::S_AND_B32:
				case AMDGPU::V_AND_B32_e64:
				case AMDGPU::V_AND_B32_e32: {
				const MachineOperand &LHS = Def->getOperand(1);
				const MachineOperand &RHS = Def->getOperand(2);

				uint64_t KnownZero2, KnownOne2;

				// If either the LHS or the RHS are Zero, the result is zero.
				computeKnownBits(MRI, LHS, KnownZero2, KnownOne2, Depth + 1);
				computeKnownBits(MRI, RHS, KnownZero, KnownOne, Depth + 1);

				// Output known-1 bits are only known if set in both the LHS & RHS.
				KnownOne &= KnownOne2;

				// Output known-0 are known to be clear if zero in either the LHS \| RHS.
				KnownZero \|= KnownZero2;
				return;
				}
				case AMDGPU::V_LSHLREV_B32_e64:
				case AMDGPU::V_LSHRREV_B32_e64:
				case AMDGPU::V_ASHRREV_I32_e64:
				case AMDGPU::V_LSHLREV_B32_e32:
				case AMDGPU::V_LSHRREV_B32_e32:
				case AMDGPU::V_ASHRREV_I32_e32: {
				computeKnownBitsShift(this, MRI, Def->getOpcode(),
				Def->getOperand(2), Def->getOperand(1),
				KnownZero, KnownOne, Depth);
				return;
				}
				case AMDGPU::S_LSHL_B32:
				case AMDGPU::S_ASHR_I32:
				case AMDGPU::S_LSHR_B32:
				case AMDGPU::V_LSHL_B32_e64:
				case AMDGPU::V_LSHR_B32_e64:
				case AMDGPU::V_ASHR_I32_e64:
				case AMDGPU::V_LSHL_B32_e32:
				case AMDGPU::V_LSHR_B32_e32:
				case AMDGPU::V_ASHR_I32_e32: {
				computeKnownBitsShift(this, MRI, Def->getOpcode(),
				Def->getOperand(1), Def->getOperand(2),
				KnownZero, KnownOne, Depth);
				return;
				}
				default:
				return;
				}
				}