Index: llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -81,6 +81,11 @@ /// Get the definition register of the loaded value. Register getDstReg() const { return getOperand(0).getReg(); } + /// Returns the Ranges that describes the dereference. + const MDNode *getRanges() const { + return getMMO().getRanges(); + } + static bool classof(const MachineInstr *MI) { switch (MI->getOpcode()) { case TargetOpcode::G_LOAD: Index: llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -12,11 +12,13 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Module.h" #define DEBUG_TYPE "gisel-known-bits" @@ -604,6 +606,36 @@ return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits); } +/// Compute the known number of sign bits with attached range metadata in the +/// memory operand. If this is an extending load, accounts for the behavior of +/// the high bits. +static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld, + unsigned TyBits) { + const MDNode *Ranges = Ld->getRanges(); + if (!Ranges) + return 1; + + ConstantRange CR = getConstantRangeFromMetadata(*Ranges); + if (TyBits > CR.getBitWidth()) { + switch (Ld->getOpcode()) { + case TargetOpcode::G_SEXTLOAD: + CR = CR.signExtend(TyBits); + break; + case TargetOpcode::G_ZEXTLOAD: + CR = CR.zeroExtend(TyBits); + break; + default: + break; + } + } + + if (TyBits != CR.getBitWidth()) + return 1; + + return std::min(CR.getSignedMin().getNumSignBits(), + CR.getSignedMax().getNumSignBits()); +} + unsigned GISelKnownBits::computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth) { @@ -655,20 +687,39 @@ unsigned InRegBits = TyBits - SrcBits + 1; return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits); } + case TargetOpcode::G_LOAD: { + GLoad *Ld = cast(&MI); + if (DemandedElts != 1 || !getDataLayout().isLittleEndian()) + break; + + return computeNumSignBitsFromRangeMetadata(Ld, TyBits); + } case TargetOpcode::G_SEXTLOAD: { + GSExtLoad *Ld = cast(&MI); + // FIXME: We need an in-memory type representation. if (DstTy.isVector()) return 1; + unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); + if (NumBits != 1) + return NumBits; + // e.g. i16->i32 = '17' bits known. const MachineMemOperand *MMO = *MI.memoperands_begin(); return TyBits - MMO->getSizeInBits() + 1; } case TargetOpcode::G_ZEXTLOAD: { + GZExtLoad *Ld = cast(&MI); + // FIXME: We need an in-memory type representation. if (DstTy.isVector()) return 1; + unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); + if (NumBits != 1) + return NumBits; + // e.g. i16->i32 = '16' bits known. const MachineMemOperand *MMO = *MI.memoperands_begin(); return TyBits - MMO->getSizeInBits(); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/load-range-metadata-signbits.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/load-range-metadata-signbits.mir @@ -0,0 +1,105 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# Test interpretation of !range metadata for a scalar memory type with a vector result register. +# FIXME: MIR handling of range metadata is broken + +--- | + define void @range_metadata_sext_i8_signed_range_i64_load_as_v2i32() { + ret void + } + + define void @range_metadata_sext_i8_signed_range_i64_load_as_v2i32_extractlo() { + ret void + } + + define void @range_metadata_sext_i33_signed_range_i64_load_as_v2i32() { + ret void + } + + !0 = !{i64 -4294967295, i64 4294967296} + !1 = !{i64 -8589934591, i64 8589934592} + +... +--- +name: range_metadata_sext_i8_signed_range_i64_load_as_v2i32 +alignment: 1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GCN-LABEL: name: range_metadata_sext_i8_signed_range_i64_load_as_v2i32 + ; GCN: liveins: $vgpr0, $vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (volatile load (s64), align 4, !range + ; GCN-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<2 x s32>) = G_SEXT_INREG [[LOAD]], 9 + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](<2 x s32>) + ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %0:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) + %3:_(<2 x s32>) = G_LOAD %0(p1) :: (volatile load (s64), align 4, !range !0, addrspace 1) + %6:_(<2 x s32>) = G_SEXT_INREG %3, 9 + $vgpr0_vgpr1 = COPY %6 + SI_RETURN implicit $vgpr0_vgpr1 + +... + +--- +name: range_metadata_sext_i8_signed_range_i64_load_as_v2i32_extractlo +alignment: 1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GCN-LABEL: name: range_metadata_sext_i8_signed_range_i64_load_as_v2i32_extractlo + ; GCN: liveins: $vgpr0, $vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (volatile load (s64), align 4, !range + ; GCN-NEXT: %zero:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: $vgpr0 = COPY %zero(s32) + ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %0:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) + %3:_(<2 x s32>) = G_LOAD %0(p1) :: (volatile load (s64), align 4, !range !0, addrspace 1) + %zero:_(s32) = G_CONSTANT i32 0 + %extract:_(s32) = G_EXTRACT_VECTOR_ELT %3, %zero + %6:_(s32) = G_SEXT_INREG %zero, 9 + $vgpr0 = COPY %6 + SI_RETURN implicit $vgpr0, implicit $vgpr1 + +... + +--- +name: range_metadata_sext_i33_signed_range_i64_load_as_v2i32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GCN-LABEL: name: range_metadata_sext_i33_signed_range_i64_load_as_v2i32 + ; GCN: liveins: $vgpr0, $vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (volatile load (s64), align 4, !range + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %0:_(p1) = G_MERGE_VALUES %1(s32), %2(s32) + %3:_(<2 x s32>) = G_LOAD %0(p1) :: (volatile load (s64), align 4, !range !1, addrspace 1) + $vgpr0_vgpr1 = COPY %3 + SI_RETURN implicit $vgpr0_vgpr1 + +... Index: llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll +++ llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GISEL %s + define i32 @range_metadata_sext_i8_signed_range_i32(ptr addrspace(1) %ptr) { ; GCN-LABEL: range_metadata_sext_i8_signed_range_i32: @@ -43,13 +45,21 @@ } define i32 @range_metadata_sext_i8_neg_neg_range_i32(ptr addrspace(1) %ptr) { -; GCN-LABEL: range_metadata_sext_i8_neg_neg_range_i32: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: global_load_dword v0, v[0:1], off glc -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v0, 63, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; SDAG-LABEL: range_metadata_sext_i8_neg_neg_range_i32: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: global_load_dword v0, v[0:1], off glc +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: v_and_b32_e32 v0, 63, v0 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: range_metadata_sext_i8_neg_neg_range_i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: global_load_dword v0, v[0:1], off glc +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0 +; GISEL-NEXT: s_setpc_b64 s[30:31] %val = load volatile i32, ptr addrspace(1) %ptr, align 4, !range !3 %shl = shl i32 %val, 25 %ashr = ashr i32 %shl, 25 @@ -98,14 +108,23 @@ } define i64 @range_metadata_sext_i8_signed_range_i64(ptr addrspace(1) %ptr) { -; GCN-LABEL: range_metadata_sext_i8_signed_range_i64: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: global_load_dwordx2 v[0:1], v[0:1], off glc -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v1, 23, v0 -; GCN-NEXT: v_ashrrev_i64 v[0:1], 55, v[0:1] -; GCN-NEXT: s_setpc_b64 s[30:31] +; SDAG-LABEL: range_metadata_sext_i8_signed_range_i64: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off glc +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: v_lshlrev_b32_e32 v1, 23, v0 +; SDAG-NEXT: v_ashrrev_i64 v[0:1], 55, v[0:1] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: range_metadata_sext_i8_signed_range_i64: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off glc +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: v_bfe_i32 v0, v0, 0, 9 +; GISEL-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GISEL-NEXT: s_setpc_b64 s[30:31] %val = load volatile i64, ptr addrspace(1) %ptr, align 4, !range !7 %shl = shl i64 %val, 55 %ashr = ashr i64 %shl, 55