diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -90,7 +90,7 @@ def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access", "UnalignedBufferAccess", "true", - "Support unaligned global loads and stores" + "Hardware supports unaligned global loads and stores" >; def FeatureTrapHandler: SubtargetFeature<"trap-handler", @@ -105,18 +105,10 @@ "Support unaligned scratch loads and stores" >; -// LDS alignment enforcement is controlled by a configuration register: -// SH_MEM_CONFIG.alignment_mode -def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode", - "UnalignedAccessMode", - "true", - "Support unaligned local and region loads and stores" ->; - def FeatureUnalignedDSAccess : SubtargetFeature<"unaligned-ds-access", "UnalignedDSAccess", "true", - "Does not requires 16 byte alignment for certain local and region loads and stores" + "Hardware supports unaligned local and region loads and stores" >; def FeatureApertureRegs : SubtargetFeature<"aperture-regs", @@ -653,6 +645,15 @@ "Requires use of fract on arguments to trig instructions" >; +// Alignment enforcement is controlled by a configuration register: +// SH_MEM_CONFIG.alignment_mode +def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode", + "UnalignedAccessMode", + "true", + "Enable unaligned global, local and region loads and stores if the hardware" + " supports it" +>; + // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", @@ -679,7 +680,8 @@ FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange, FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, - FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC] + FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, + FeatureUnalignedBufferAccess] >; def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", @@ -692,7 +694,8 @@ FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP, FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, - FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, FeatureFastDenormalF32 + FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, FeatureFastDenormalF32, + FeatureUnalignedBufferAccess ] >; @@ -709,7 +712,8 @@ FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, - FeatureFastDenormalF32, FeatureUnalignedDSAccess + FeatureFastDenormalF32, FeatureUnalignedBufferAccess, + FeatureUnalignedDSAccess ] >; @@ -728,7 +732,7 @@ FeatureVOP3Literal, FeatureDPP8, FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC, FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16, - FeatureUnalignedDSAccess + FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess ] >; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1051,9 +1051,9 @@ return false; }; - unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32; - unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16; - unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8; + unsigned GlobalAlign32 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 32; + unsigned GlobalAlign16 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 16; + unsigned GlobalAlign8 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 8; // TODO: Refine based on subtargets which support unaligned access or 128-bit // LDS diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -314,7 +314,6 @@ bool AutoWaitcntBeforeBarrier; bool CodeObjectV3; bool UnalignedScratchAccess; - bool UnalignedBufferAccess; bool UnalignedAccessMode; bool HasApertureRegs; bool EnableXNACK; @@ -395,6 +394,7 @@ bool HasMFMAInlineLiteralBug; bool HasVertexCache; short TexVTXClauseSize; + bool UnalignedBufferAccess; bool UnalignedDSAccess; bool ScalarizeGlobal; @@ -697,6 +697,18 @@ return UnalignedBufferAccess; } + bool hasUnalignedBufferAccessEnabled() const { + return UnalignedBufferAccess && UnalignedAccessMode; + } + + bool hasUnalignedDSAccess() const { + return UnalignedDSAccess; + } + + bool hasUnalignedDSAccessEnabled() const { + return UnalignedDSAccess && UnalignedAccessMode; + } + bool hasUnalignedScratchAccess() const { return UnalignedScratchAccess; } @@ -705,10 +717,6 @@ return UnalignedAccessMode; } - bool hasUnalignedDSAccess() const { - return UnalignedDSAccess; - } - bool hasApertureRegs() const { return HasApertureRegs; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -81,7 +81,7 @@ SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,+sram-ecc,+xnack,"); if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. - FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,"; + FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,"; FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS @@ -186,7 +186,6 @@ AutoWaitcntBeforeBarrier(false), CodeObjectV3(false), UnalignedScratchAccess(false), - UnalignedBufferAccess(false), UnalignedAccessMode(false), HasApertureRegs(false), @@ -258,6 +257,7 @@ HasUnpackedD16VMem(false), LDSMisalignedBug(false), HasMFMAInlineLiteralBug(false), + UnalignedBufferAccess(false), UnalignedDSAccess(false), ScalarizeGlobal(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -88,7 +88,6 @@ AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureFlatForGlobal, AMDGPU::FeaturePromoteAlloca, - AMDGPU::FeatureUnalignedBufferAccess, AMDGPU::FeatureUnalignedScratchAccess, AMDGPU::FeatureUnalignedAccessMode, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1398,8 +1398,7 @@ AddrSpace == AMDGPUAS::REGION_ADDRESS) { // Check if alignment requirements for ds_read/write instructions are // disabled. - if (Subtarget->hasUnalignedDSAccess() && - Subtarget->hasUnalignedAccessMode()) { + if (Subtarget->hasUnalignedDSAccessEnabled()) { if (IsFast) *IsFast = true; return true; @@ -1450,7 +1449,7 @@ return AlignedBy4; } - if (Subtarget->hasUnalignedBufferAccess() && + if (Subtarget->hasUnalignedBufferAccessEnabled() && !(AddrSpace == AMDGPUAS::LOCAL_ADDRESS || AddrSpace == AMDGPUAS::REGION_ADDRESS)) { // If we have an uniform constant load, it still requires using a slow diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s ; FIXME: ; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -1,10 +1,10 @@ ; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s +; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s ; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s -; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s +; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll --- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX900 %s define <2 x half> @chain_hi_to_lo_private() { ; GCN-LABEL: chain_hi_to_lo_private: diff --git a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll --- a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll +++ b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7-ALIGNED %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7-UNALIGNED %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7-ALIGNED %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7-UNALIGNED %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9 %s ; Should not merge this to a dword load define i32 @global_load_2xi16_align2(i16 addrspace(1)* %p) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll --- a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll +++ b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s -; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-buffer-access -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s +; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-access-mode -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s ; SI-LABEL: {{^}}local_unaligned_load_store_i16: diff --git a/llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline-ir.mir --- a/llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline-ir.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline-ir.mir @@ -1,5 +1,5 @@ # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=none -o - %s | FileCheck -check-prefix=MCPU %s -# RUN: llc -march=amdgcn -mattr=+unaligned-buffer-access -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s +# RUN: llc -march=amdgcn -mattr=+unaligned-access-mode -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s # FIXME: This overrides attributes that already are present. It should probably # only touch functions without an existing attribute. @@ -10,8 +10,8 @@ # MCPU: attributes #0 = { "target-cpu"="fiji" } # MCPU: attributes #1 = { "target-cpu"="hawaii" } -# MATTR: attributes #0 = { "target-cpu"="fiji" "target-features"="+unaligned-buffer-access" } -# MATTR: attributes #1 = { "target-features"="+unaligned-buffer-access" } +# MATTR: attributes #0 = { "target-cpu"="fiji" "target-features"="+unaligned-access-mode" } +# MATTR: attributes #1 = { "target-features"="+unaligned-access-mode" } --- | define amdgpu_kernel void @with_cpu_attr() #0 { diff --git a/llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline.mir b/llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline.mir --- a/llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline.mir @@ -1,10 +1,10 @@ # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=none -o - %s | FileCheck -check-prefix=MCPU %s -# RUN: llc -march=amdgcn -mattr=+unaligned-buffer-access -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s +# RUN: llc -march=amdgcn -mattr=+unaligned-access-mode -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s # The command line arguments for -mcpu and -mattr should manifest themselves by adding the corresponding attributes to the stub IR function. # MCPU: attributes #0 = { "target-cpu"="hawaii" } -# MATTR: attributes #0 = { "target-features"="+unaligned-buffer-access" } +# MATTR: attributes #0 = { "target-features"="+unaligned-access-mode" } --- name: no_ir diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll @@ -1,7 +1,7 @@ -; RUN: opt -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s -; RUN: opt -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s -; RUN: opt -S -passes='function(load-store-vectorizer)' -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s -; RUN: opt -S -passes='function(load-store-vectorizer)' -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s +; RUN: opt -S -load-store-vectorizer --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s +; RUN: opt -S -load-store-vectorizer --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s +; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s +; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s target triple = "amdgcn--" target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -S -o - %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa --mcpu=hawaii -load-store-vectorizer -S -o - %s | FileCheck %s ; Copy of test/CodeGen/AMDGPU/merge-stores.ll with some additions target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"