Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -98,6 +98,16 @@ "true", "Enable SI load/store optimizer pass">; +// Performance debugging feature. Allow using DS instruction immediate +// offsets even if the base pointer can't be proven to be base. On SI, +// base pointer values that won't give the same result as a 16-bit add +// are not safe to fold, but this will override the conservative test +// for the base pointer. +def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <"unsafe-ds-offset-folding", + "EnableUnsafeDSOffsetFolding", + "true", + "Force using DS instruction immediate offsets on SI">; + def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", "FlatAddressSpace", "true", Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -859,7 +859,8 @@ (OffsetBits == 8 && !isUInt<8>(Offset))) return false; - if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) + if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || + Subtarget->unsafeDSOffsetFoldingEnabled()) return true; // On Southern Islands instruction with a negative base value and an offset Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -76,6 +76,7 @@ bool EnablePromoteAlloca; bool EnableIfCvt; bool EnableLoadStoreOpt; + bool EnableUnsafeDSOffsetFolding; unsigned WavefrontSize; bool CFALUBug; int LocalMemorySize; @@ -222,6 +223,10 @@ return EnableLoadStoreOpt; } + bool unsafeDSOffsetFoldingEnabled() const { + return EnableUnsafeDSOffsetFolding; + } + unsigned getWavefrontSize() const { return WavefrontSize; } Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -69,6 +69,7 @@ FP64Denormals(false), FP32Denormals(false), FastFMAF32(false), CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false), + EnableUnsafeDSOffsetFolding(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0), Index: test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll =================================================================== --- test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll +++ test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt,+unsafe-ds-offset-folding < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s declare i32 @llvm.r600.read.tidig.x() #0 declare void @llvm.AMDGPU.barrier.local() #1