Index: llvm/lib/Target/AMDGPU/AMDGPU.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPU.td +++ llvm/lib/Target/AMDGPU/AMDGPU.td @@ -94,7 +94,13 @@ >; def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access", - "UnalignedBufferAccess", + "EnableUnalignedBufferAccess", + "true", + "Enable unaligned global loads and stores if hardware supports it" +>; + +def FeatureSupportUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access-support", + "SupportsUnalignedBufferAccess", "true", "Hardware supports unaligned global loads and stores" >; @@ -112,7 +118,13 @@ >; def FeatureUnalignedDSAccess : SubtargetFeature<"unaligned-ds-access", - "UnalignedDSAccess", + "EnableUnalignedDSAccess", + "true", + "Enable unaligned local and region loads and stores if hardware supports it" +>; + +def FeatureSupportUnalignedDSAccess : SubtargetFeature<"unaligned-ds-access-support", + "SupportsUnalignedDSAccess", "true", "Hardware supports unaligned local and region loads and stores" >; @@ -692,15 +704,6 @@ "Requires use of fract on arguments to trig instructions" >; -// Alignment enforcement is controlled by a configuration register: -// SH_MEM_CONFIG.alignment_mode -def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode", - "UnalignedAccessMode", - "true", - "Enable unaligned global, local and region loads and stores if the hardware" - " supports it" ->; - def FeaturePackedTID : SubtargetFeature<"packed-tid", "HasPackedTID", "true", @@ -733,7 +736,8 @@ FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange, FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, - FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess + FeatureDsSrc2Insts, FeatureExtendedImageInsts, + FeatureSupportUnalignedBufferAccess ] >; @@ -748,7 +752,7 @@ FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32, - FeatureUnalignedBufferAccess + FeatureSupportUnalignedBufferAccess ] >; @@ -765,7 +769,7 @@ FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK, - FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess + FeatureSupportUnalignedBufferAccess, FeatureSupportUnalignedDSAccess ] >; @@ -784,7 +788,7 @@ FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureGFX10A16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16, - FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess + FeatureSupportUnalignedBufferAccess, FeatureSupportUnalignedDSAccess ] >; @@ -1413,8 +1417,8 @@ def DisableFlatScratch : Predicate<"!Subtarget->enableFlatScratch()">; -def HasUnalignedAccessMode : Predicate<"Subtarget->hasUnalignedAccessMode()">, - AssemblerPredicate<(all_of FeatureUnalignedAccessMode)>; +def HasUnalignedDSAccess : Predicate<"Subtarget->hasUnalignedDSAccessEnabled()">, + AssemblerPredicate<(all_of FeatureUnalignedDSAccess)>; // Include AMDGPU TD files include "SISchedule.td" Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -93,7 +93,7 @@ // Turn on features that HSA ABI requires. Also turn on FlatForGlobal by default if (isAmdHsaOS()) - FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,"; + FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,"; FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS @@ -213,7 +213,8 @@ FlatForGlobal(false), AutoWaitcntBeforeBarrier(false), UnalignedScratchAccess(false), - UnalignedAccessMode(false), + EnableUnalignedBufferAccess(false), + EnableUnalignedDSAccess(false), HasApertureRegs(false), SupportsXNACK(false), @@ -289,8 +290,8 @@ HasUnpackedD16VMem(false), LDSMisalignedBug(false), HasMFMAInlineLiteralBug(false), - UnalignedBufferAccess(false), - UnalignedDSAccess(false), + SupportsUnalignedBufferAccess(false), + SupportsUnalignedDSAccess(false), HasPackedTID(false), ScalarizeGlobal(false), Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -262,7 +262,7 @@ AMDGPU::FeatureEnableLoadStoreOpt, AMDGPU::FeatureEnableSIScheduler, AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureFlatForGlobal, AMDGPU::FeaturePromoteAlloca, AMDGPU::FeatureUnalignedScratchAccess, - AMDGPU::FeatureUnalignedAccessMode, + AMDGPU::FeatureUnalignedBufferAccess, AMDGPU::FeatureUnalignedDSAccess, AMDGPU::FeatureAutoWaitcntBeforeBarrier, Index: llvm/lib/Target/AMDGPU/DSInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/DSInstructions.td +++ llvm/lib/Target/AMDGPU/DSInstructions.td @@ -728,7 +728,7 @@ defm : DSReadPat_mc ; } -let SubtargetPredicate = HasUnalignedAccessMode in { +let SubtargetPredicate = HasUnalignedDSAccess in { foreach vt = VReg_96.RegTypes in { defm : DSReadPat_mc ; @@ -738,7 +738,7 @@ defm : DSReadPat_mc ; } -} // End SubtargetPredicate = HasUnalignedAccessMode +} // End SubtargetPredicate = HasUnalignedDSAccess } // End SubtargetPredicate = isGFX7Plus @@ -883,7 +883,7 @@ defm : DSWritePat_mc ; } -let SubtargetPredicate = HasUnalignedAccessMode in { +let SubtargetPredicate = HasUnalignedDSAccess in { foreach vt = VReg_96.RegTypes in { defm : DSWritePat_mc ; @@ -893,7 +893,7 @@ defm : DSWritePat_mc ; } -} // End SubtargetPredicate = HasUnalignedAccessMode +} // End SubtargetPredicate = HasUnalignedDSAccess } // End SubtargetPredicate = isGFX7Plus Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -88,7 +88,8 @@ bool FlatForGlobal; bool AutoWaitcntBeforeBarrier; bool UnalignedScratchAccess; - bool UnalignedAccessMode; + bool EnableUnalignedBufferAccess; + bool EnableUnalignedDSAccess; bool HasApertureRegs; bool SupportsXNACK; @@ -181,8 +182,8 @@ bool HasMFMAInlineLiteralBug; bool HasVertexCache; short TexVTXClauseSize; - bool UnalignedBufferAccess; - bool UnalignedDSAccess; + bool SupportsUnalignedBufferAccess; + bool SupportsUnalignedDSAccess; bool HasPackedTID; bool ScalarizeGlobal; @@ -492,29 +493,25 @@ } bool hasUnalignedBufferAccess() const { - return UnalignedBufferAccess; + return SupportsUnalignedBufferAccess; } bool hasUnalignedBufferAccessEnabled() const { - return UnalignedBufferAccess && UnalignedAccessMode; + return SupportsUnalignedBufferAccess && EnableUnalignedBufferAccess; } bool hasUnalignedDSAccess() const { - return UnalignedDSAccess; + return SupportsUnalignedDSAccess; } bool hasUnalignedDSAccessEnabled() const { - return UnalignedDSAccess && UnalignedAccessMode; + return SupportsUnalignedDSAccess && EnableUnalignedDSAccess; } bool hasUnalignedScratchAccess() const { return UnalignedScratchAccess; } - bool hasUnalignedAccessMode() const { - return UnalignedAccessMode; - } - bool hasApertureRegs() const { return HasApertureRegs; } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,ALIGNED %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,ALIGNED %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode < %s | FileCheck -check-prefixes=GCN,ALIGNED %s -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode,+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,UNALIGNED %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode,+unaligned-ds-access < %s | FileCheck -check-prefixes=GCN,UNALIGNED %s ; GCN-LABEL: test_local_misaligned_v2: ; GCN-DAG: ds_read2_b32 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -3,8 +3,8 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -mattr=-enable-ds128 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -mattr=+enable-ds128 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI-DS128 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -mattr=-unaligned-access-mode -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -mattr=+unaligned-access-mode -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9-UNALIGNED %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -mattr=-unaligned-ds-access -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -mattr=+unaligned-ds-access -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9-UNALIGNED %s --- name: test_load_local_s1_align1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s ; FIXME: ; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s Index: llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-access-mode -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-access-mode -verify-machineinstrs < %s | FileCheck --check-prefix=GFX7 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-ds-access -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-ds-access -verify-machineinstrs < %s | FileCheck --check-prefix=GFX7 %s ; Unaligned DS access in available from GFX9 onwards. ; LDS alignment enforcement is controlled by a configuration register: Index: llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -1,10 +1,10 @@ ; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s +; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s ; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s +; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s Index: llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX900 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-access-mode -amdgpu-enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-buffer-access -amdgpu-enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s define <2 x half> @chain_hi_to_lo_private() { ; GFX900-LABEL: chain_hi_to_lo_private: Index: llvm/test/CodeGen/AMDGPU/ds_read2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/ds_read2.ll +++ llvm/test/CodeGen/AMDGPU/ds_read2.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope --check-prefix=CI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-ALIGNED %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-UNALIGNED %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,-unaligned-ds-access < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-ALIGNED %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+unaligned-ds-access < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-UNALIGNED %s ; FIXME: We don't get cases where the address was an SGPR because we ; get a copy to the address register for each one. Index: llvm/test/CodeGen/AMDGPU/ds_write2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/ds_write2.ll +++ llvm/test/CodeGen/AMDGPU/ds_write2.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope --check-prefix=CI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-ALIGNED %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-UNALIGNED %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,-unaligned-ds-access < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-ALIGNED %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+unaligned-ds-access < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-UNALIGNED %s @lds = addrspace(3) global [512 x float] undef, align 4 @lds.f64 = addrspace(3) global [512 x double] undef, align 8 Index: llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll +++ llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck --check-prefix=GFX7-ALIGNED %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck --check-prefix=GFX7-UNALIGNED %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck --check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-buffer-access < %s | FileCheck --check-prefix=GFX7-ALIGNED %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-buffer-access < %s | FileCheck --check-prefix=GFX7-UNALIGNED %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-buffer-access < %s | FileCheck --check-prefix=GFX9 %s ; Should not merge this to a dword load define i32 @global_load_2xi16_align2(i16 addrspace(1)* %p) #0 { Index: llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll +++ llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,ALIGNED,SPLIT %s ; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,ALIGNED,SPLIT %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode < %s | FileCheck -check-prefixes=GCN,ALIGNED,VECT %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode,+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,UNALIGNED,VECT %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode,+unaligned-ds-access < %s | FileCheck -check-prefixes=GCN,UNALIGNED,VECT %s ; GCN-LABEL: test_local_misaligned_v2: ; GCN-DAG: ds_read2_b32 Index: llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll +++ llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefixes=SI,MUBUF,ALIGNED %s -; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-access-mode -verify-machineinstrs< %s | FileCheck -check-prefixes=SI,MUBUF,UNALIGNED %s +; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-buffer-access -verify-machineinstrs< %s | FileCheck -check-prefixes=SI,MUBUF,UNALIGNED %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefixes=SI,MUBUF,ALIGNED %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=SI,FLATSCR,ALIGNED %s Index: llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline-ir.mir =================================================================== --- llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline-ir.mir +++ llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline-ir.mir @@ -1,5 +1,5 @@ # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=none -o - %s | FileCheck -check-prefix=MCPU %s -# RUN: llc -march=amdgcn -mattr=+unaligned-access-mode -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s +# RUN: llc -march=amdgcn -mattr=+unaligned-buffer-access -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s # FIXME: This overrides attributes that already are present. It should probably # only touch functions without an existing attribute. @@ -10,8 +10,8 @@ # MCPU: attributes #0 = { "target-cpu"="fiji" } # MCPU: attributes #1 = { "target-cpu"="hawaii" } -# MATTR: attributes #0 = { "target-cpu"="fiji" "target-features"="+unaligned-access-mode" } -# MATTR: attributes #1 = { "target-features"="+unaligned-access-mode" } +# MATTR: attributes #0 = { "target-cpu"="fiji" "target-features"="+unaligned-buffer-access" } +# MATTR: attributes #1 = { "target-features"="+unaligned-buffer-access" } --- | define amdgpu_kernel void @with_cpu_attr() #0 { Index: llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline.mir =================================================================== --- llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline.mir +++ llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline.mir @@ -1,10 +1,10 @@ # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=none -o - %s | FileCheck -check-prefix=MCPU %s -# RUN: llc -march=amdgcn -mattr=+unaligned-access-mode -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s +# RUN: llc -march=amdgcn -mattr=+unaligned-ds-access -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s # The command line arguments for -mcpu and -mattr should manifest themselves by adding the corresponding attributes to the stub IR function. # MCPU: attributes #0 = { "target-cpu"="hawaii" } -# MATTR: attributes #0 = { "target-features"="+unaligned-access-mode" } +# MATTR: attributes #0 = { "target-features"="+unaligned-ds-access" } --- name: no_ir Index: llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll =================================================================== --- llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll +++ llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll @@ -1,7 +1,7 @@ -; RUN: opt -S -load-store-vectorizer --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s -; RUN: opt -S -load-store-vectorizer --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s -; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s -; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s +; RUN: opt -S -load-store-vectorizer --mcpu=hawaii -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s +; RUN: opt -S -load-store-vectorizer --mcpu=hawaii -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s +; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s +; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s target triple = "amdgcn--" target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" Index: llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll =================================================================== --- llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll +++ llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -basic-aa -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN,GFX7 %s -; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -basic-aa -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -basic-aa -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -basic-aa -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN %s target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" @@ -31,20 +31,13 @@ ; GCN-LABEL: @interleave_get_longest -; GFX7: load <2 x i32> -; GFX7: load i32 -; GFX7: store <2 x i32> zeroinitializer -; GFX7: load i32 -; GFX7: load <2 x i32> -; GFX7: load i32 -; GFX7: load i32 - -; GFX9: load <4 x i32> -; GFX9: load i32 -; GFX9: store <2 x i32> zeroinitializer -; GFX9: load i32 -; GFX9: load i32 -; GFX9: load i32 +; GCN: load <2 x i32> +; GCN: load i32 +; GCN: store <2 x i32> zeroinitializer +; GCN: load i32 +; GCN: load <2 x i32> +; GCN: load i32 +; GCN: load i32 define amdgpu_kernel void @interleave_get_longest(i32 %arg) { %a1 = add i32 %arg, 1