Index: llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -81,6 +81,11 @@ /// Get the definition register of the loaded value. Register getDstReg() const { return getOperand(0).getReg(); } + /// Returns the Ranges that describes the dereference. + const MDNode *getRanges() const { + return getMMO().getRanges(); + } + static bool classof(const MachineInstr *MI) { switch (MI->getOpcode()) { case TargetOpcode::G_LOAD: Index: llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -12,11 +12,13 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Module.h" #define DEBUG_TYPE "gisel-known-bits" @@ -604,6 +606,36 @@ return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits); } +/// Compute the known number of sign bits with attached range metadata in the +/// memory operand. If this is an extending load, accounts for the behavior of +/// the high bits. +static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld, + unsigned TyBits) { + const MDNode *Ranges = Ld->getRanges(); + if (!Ranges) + return 1; + + ConstantRange CR = getConstantRangeFromMetadata(*Ranges); + if (TyBits > CR.getBitWidth()) { + switch (Ld->getOpcode()) { + case TargetOpcode::G_SEXTLOAD: + CR = CR.signExtend(TyBits); + break; + case TargetOpcode::G_ZEXTLOAD: + CR = CR.zeroExtend(TyBits); + break; + default: + break; + } + } + + if (TyBits != CR.getBitWidth()) + return 1; + + return std::min(CR.getSignedMin().getNumSignBits(), + CR.getSignedMax().getNumSignBits()); +} + unsigned GISelKnownBits::computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth) { @@ -655,20 +687,39 @@ unsigned InRegBits = TyBits - SrcBits + 1; return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits); } + case TargetOpcode::G_LOAD: { + GLoad *Ld = cast(&MI); + if (DemandedElts != 1) + break; + + return computeNumSignBitsFromRangeMetadata(Ld, TyBits); + } case TargetOpcode::G_SEXTLOAD: { + GSExtLoad *Ld = cast(&MI); + // FIXME: We need an in-memory type representation. if (DstTy.isVector()) return 1; + unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); + if (NumBits != 1) + return NumBits; + // e.g. i16->i32 = '17' bits known. const MachineMemOperand *MMO = *MI.memoperands_begin(); return TyBits - MMO->getSizeInBits() + 1; } case TargetOpcode::G_ZEXTLOAD: { + GZExtLoad *Ld = cast(&MI); + // FIXME: We need an in-memory type representation. if (DstTy.isVector()) return 1; + unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); + if (NumBits != 1) + return NumBits; + // e.g. i16->i32 = '16' bits known. const MachineMemOperand *MMO = *MI.memoperands_begin(); return TyBits - MMO->getSizeInBits(); Index: llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll +++ llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GISEL %s + define i32 @range_metadata_sext_i8_signed_range_i32(ptr addrspace(1) %ptr) { ; GCN-LABEL: range_metadata_sext_i8_signed_range_i32: @@ -43,13 +45,21 @@ } define i32 @range_metadata_sext_i8_neg_neg_range_i32(ptr addrspace(1) %ptr) { -; GCN-LABEL: range_metadata_sext_i8_neg_neg_range_i32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: global_load_dword v0, v[0:1], off glc -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v0, 63, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; SDAG-LABEL: range_metadata_sext_i8_neg_neg_range_i32: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: global_load_dword v0, v[0:1], off glc +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: v_and_b32_e32 v0, 63, v0 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: range_metadata_sext_i8_neg_neg_range_i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: global_load_dword v0, v[0:1], off glc +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0 +; GISEL-NEXT: s_setpc_b64 s[30:31] %val = load volatile i32, ptr addrspace(1) %ptr, align 4, !range !3 %shl = shl i32 %val, 25 %ashr = ashr i32 %shl, 25 @@ -98,14 +108,23 @@ } define i64 @range_metadata_sext_i8_signed_range_i64(ptr addrspace(1) %ptr) { -; GCN-LABEL: range_metadata_sext_i8_signed_range_i64: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: global_load_dwordx2 v[0:1], v[0:1], off glc -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v1, 23, v0 -; GCN-NEXT: v_ashrrev_i64 v[0:1], 55, v[0:1] -; GCN-NEXT: s_setpc_b64 s[30:31] +; SDAG-LABEL: range_metadata_sext_i8_signed_range_i64: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off glc +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: v_lshlrev_b32_e32 v1, 23, v0 +; SDAG-NEXT: v_ashrrev_i64 v[0:1], 55, v[0:1] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: range_metadata_sext_i8_signed_range_i64: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off glc +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: v_bfe_i32 v0, v0, 0, 9 +; GISEL-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GISEL-NEXT: s_setpc_b64 s[30:31] %val = load volatile i64, ptr addrspace(1) %ptr, align 4, !range !7 %shl = shl i64 %val, 55 %ashr = ashr i64 %shl, 55