Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -751,6 +751,10 @@ CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; bool isBasicBlockPrologue(const MachineInstr &MI) const override; + + void computeKnownBits(const MachineRegisterInfo &MRI, const MachineOperand &Op, + uint64_t &KnownZero, uint64_t &KnownOne, + unsigned Depth = 0) const; }; namespace AMDGPU { Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3884,3 +3884,158 @@ return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY && MI.modifiesRegister(AMDGPU::EXEC, &RI); } + +static bool isImmOrMaterializedImm(const MachineRegisterInfo &MRI, + const MachineOperand &Op, + uint64_t &Imm) { + if (Op.isImm()) { + Imm = Op.getImm(); + return true; + } + + if (Op.isReg()) { + const MachineInstr *Def = MRI.getUniqueVRegDef(Op.getReg()); + if (!Def || !Def->isMoveImmediate()) + return false; + + const MachineOperand &Src = Def->getOperand(1); + if (Src.isImm()) { + Imm = Src.getImm(); + return true; + } + + return false; + } + + return false; +} + +static void computeKnownBitsShift(const SIInstrInfo *TII, + const MachineRegisterInfo &MRI, + unsigned Opcode, + const MachineOperand &LHS, + const MachineOperand &RHS, + uint64_t &KnownZero, + uint64_t &KnownOne, + unsigned Depth) { + uint64_t ShiftAmt; + if (!isImmOrMaterializedImm(MRI, RHS, ShiftAmt)) + return; + + switch (Opcode) { + case AMDGPU::S_LSHL_B32: + case AMDGPU::V_LSHLREV_B32_e64: + case AMDGPU::V_LSHLREV_B32_e32: + case AMDGPU::V_LSHL_B32_e64: + case AMDGPU::V_LSHL_B32_e32: { + TII->computeKnownBits(MRI, LHS, KnownZero, KnownOne, Depth + 1); + + KnownZero <<= ShiftAmt; + KnownOne <<= ShiftAmt; + + // Low bits are known zero. + KnownZero |= (UINT64_C(1) << (ShiftAmt - 1)) - 1; + return; + } + case AMDGPU::S_LSHR_B32: + case AMDGPU::V_LSHRREV_B32_e64: + case AMDGPU::V_LSHRREV_B32_e32: + case AMDGPU::V_LSHR_B32_e64: + case AMDGPU::V_LSHR_B32_e32: { + TII->computeKnownBits(MRI, LHS, KnownZero, KnownOne, Depth + 1); + + KnownZero >>= ShiftAmt; + KnownOne >>= ShiftAmt; + + // High bits are known zero. + uint64_t Mask = (UINT64_C(1) << (ShiftAmt - 1)) - 1; + uint64_t HighBits = Mask << ShiftAmt; + KnownZero |= HighBits; + return; + } + default: + return; + } +} + +void SIInstrInfo::computeKnownBits(const MachineRegisterInfo &MRI, + const MachineOperand &Op, + uint64_t &KnownZero, + uint64_t &KnownOne, + unsigned Depth) const { + if (Depth > 6) + return; + + uint64_t ImmVal; + if (isImmOrMaterializedImm(MRI, Op, ImmVal)) { + KnownOne = ImmVal; + KnownZero = ~ImmVal; + return; + } + + if (!Op.isReg()) + return; + + unsigned Reg = Op.getReg(); + const MachineInstr *Def = MRI.getUniqueVRegDef(Reg); + if (!Def) + return; + + + switch (Def->getOpcode()) { + case AMDGPU::COPY: { + const MachineOperand &Src = Def->getOperand(1); + if (Src.getSubReg() != AMDGPU::NoSubRegister) + return; + + computeKnownBits(MRI, Src, KnownZero, KnownOne, Depth + 1); + return; + } + case AMDGPU::S_AND_B32: + case AMDGPU::V_AND_B32_e64: + case AMDGPU::V_AND_B32_e32: { + const MachineOperand &LHS = Def->getOperand(1); + const MachineOperand &RHS = Def->getOperand(2); + + uint64_t KnownZero2, KnownOne2; + + // If either the LHS or the RHS are Zero, the result is zero. + computeKnownBits(MRI, LHS, KnownZero2, KnownOne2, Depth + 1); + computeKnownBits(MRI, RHS, KnownZero, KnownOne, Depth + 1); + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + return; + } + case AMDGPU::V_LSHLREV_B32_e64: + case AMDGPU::V_LSHRREV_B32_e64: + case AMDGPU::V_ASHRREV_I32_e64: + case AMDGPU::V_LSHLREV_B32_e32: + case AMDGPU::V_LSHRREV_B32_e32: + case AMDGPU::V_ASHRREV_I32_e32: { + computeKnownBitsShift(this, MRI, Def->getOpcode(), + Def->getOperand(2), Def->getOperand(1), + KnownZero, KnownOne, Depth); + return; + } + case AMDGPU::S_LSHL_B32: + case AMDGPU::S_ASHR_I32: + case AMDGPU::S_LSHR_B32: + case AMDGPU::V_LSHL_B32_e64: + case AMDGPU::V_LSHR_B32_e64: + case AMDGPU::V_ASHR_I32_e64: + case AMDGPU::V_LSHL_B32_e32: + case AMDGPU::V_LSHR_B32_e32: + case AMDGPU::V_ASHR_I32_e32: { + computeKnownBitsShift(this, MRI, Def->getOpcode(), + Def->getOperand(1), Def->getOperand(2), + KnownZero, KnownOne, Depth); + return; + } + default: + return; + } +}