Index: llvm/lib/Target/AMDGPU/AMDGPU.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPU.td +++ llvm/lib/Target/AMDGPU/AMDGPU.td @@ -87,12 +87,6 @@ "Have VALU add/sub instructions without carry out" >; -def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access", - "UnalignedBufferAccess", - "true", - "Support unaligned global loads and stores" ->; - def FeatureTrapHandler: SubtargetFeature<"trap-handler", "TrapHandler", "true", @@ -105,12 +99,12 @@ "Support unaligned scratch loads and stores" >; -// LDS alignment enforcement is controlled by a configuration register: +// Alignment enforcement is controlled by a configuration register: // SH_MEM_CONFIG.alignment_mode def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode", "UnalignedAccessMode", "true", - "Support unaligned local and region loads and stores" + "Support unaligned global, local and region loads and stores" >; def FeatureUnalignedDSAccess : SubtargetFeature<"unaligned-ds-access", Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -970,9 +970,9 @@ return (Align >= RoundedSize); }; - unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32; - unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16; - unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8; + unsigned GlobalAlign32 = ST.hasUnalignedAccessMode() ? 0 : 32; + unsigned GlobalAlign16 = ST.hasUnalignedAccessMode() ? 0 : 16; + unsigned GlobalAlign8 = ST.hasUnalignedAccessMode() ? 0 : 8; // TODO: Refine based on subtargets which support unaligned access or 128-bit // LDS Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -81,7 +81,7 @@ SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,+sram-ecc,+xnack,"); if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. - FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,"; + FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,"; FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -87,7 +87,6 @@ AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureFlatForGlobal, AMDGPU::FeaturePromoteAlloca, - AMDGPU::FeatureUnalignedBufferAccess, AMDGPU::FeatureUnalignedScratchAccess, AMDGPU::FeatureUnalignedAccessMode, Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1416,7 +1416,7 @@ return AlignedBy4; } - if (Subtarget->hasUnalignedBufferAccess() && + if (Subtarget->hasUnalignedAccessMode() && !(AddrSpace == AMDGPUAS::LOCAL_ADDRESS || AddrSpace == AMDGPUAS::REGION_ADDRESS)) { // If we have an uniform constant load, it still requires using a slow Index: llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s ; FIXME: ; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s Index: llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -1,10 +1,10 @@ ; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s +; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s ; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s -; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s +; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s Index: llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX900 %s define <2 x half> @chain_hi_to_lo_private() { ; GCN-LABEL: chain_hi_to_lo_private: Index: llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll +++ llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7-ALIGNED %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7-UNALIGNED %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7-ALIGNED %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7-UNALIGNED %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9 %s ; Should not merge this to a dword load define i32 @global_load_2xi16_align2(i16 addrspace(1)* %p) #0 { Index: llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll +++ llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s -; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-buffer-access -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s +; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-access-mode -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s ; SI-LABEL: {{^}}local_unaligned_load_store_i16: Index: llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline-ir.mir =================================================================== --- llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline-ir.mir +++ llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline-ir.mir @@ -1,5 +1,5 @@ # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=none -o - %s | FileCheck -check-prefix=MCPU %s -# RUN: llc -march=amdgcn -mattr=+unaligned-buffer-access -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s +# RUN: llc -march=amdgcn -mattr=+unaligned-access-mode -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s # FIXME: This overrides attributes that already are present. It should probably # only touch functions without an existing attribute. @@ -10,8 +10,8 @@ # MCPU: attributes #0 = { "target-cpu"="fiji" } # MCPU: attributes #1 = { "target-cpu"="hawaii" } -# MATTR: attributes #0 = { "target-cpu"="fiji" "target-features"="+unaligned-buffer-access" } -# MATTR: attributes #1 = { "target-features"="+unaligned-buffer-access" } +# MATTR: attributes #0 = { "target-cpu"="fiji" "target-features"="+unaligned-access-mode" } +# MATTR: attributes #1 = { "target-features"="+unaligned-access-mode" } --- | define amdgpu_kernel void @with_cpu_attr() #0 { Index: llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline.mir =================================================================== --- llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline.mir +++ llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline.mir @@ -1,10 +1,10 @@ # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=none -o - %s | FileCheck -check-prefix=MCPU %s -# RUN: llc -march=amdgcn -mattr=+unaligned-buffer-access -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s +# RUN: llc -march=amdgcn -mattr=+unaligned-access-mode -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s # The command line arguments for -mcpu and -mattr should manifest themselves by adding the corresponding attributes to the stub IR function. # MCPU: attributes #0 = { "target-cpu"="hawaii" } -# MATTR: attributes #0 = { "target-features"="+unaligned-buffer-access" } +# MATTR: attributes #0 = { "target-features"="+unaligned-access-mode" } --- name: no_ir Index: llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll =================================================================== --- llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll +++ llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll @@ -1,7 +1,7 @@ -; RUN: opt -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s -; RUN: opt -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s -; RUN: opt -S -passes='function(load-store-vectorizer)' -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s -; RUN: opt -S -passes='function(load-store-vectorizer)' -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s +; RUN: opt -S -load-store-vectorizer -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s +; RUN: opt -S -load-store-vectorizer -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s +; RUN: opt -S -passes='function(load-store-vectorizer)' -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s +; RUN: opt -S -passes='function(load-store-vectorizer)' -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s target triple = "amdgcn--" target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"