diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1909,10 +1909,10 @@ /// the object whose address is being passed. If so then MinSize is set to the /// minimum size the object must be to be aligned and PrefAlign is set to the /// preferred alignment. - virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/, - Align & /*PrefAlign*/) const { - return false; - } + virtual bool shouldUpdatePointerArgAlignment(const CallInst *CI, + const Value *Arg, + unsigned &MinSize, + Align &PrefAlign) const; //===--------------------------------------------------------------------===// /// \name Helpers for TargetTransformInfo implementations diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2221,16 +2221,16 @@ // Align the pointer arguments to this call if the target thinks it's a good // idea (generally only useful for memcpy/memmove/memset). - for (auto &Arg : CI->args()) { - // We want to align both objects whose address is used directly and - // objects whose address is used in casts and GEPs, though it only makes - // sense for GEPs if the offset is a multiple of the desired alignment and - // if size - offset meets the size threshold. - if (!Arg->getType()->isPointerTy()) - continue; + for (auto &Arg : CI->args()) { + // We want to align both objects whose address is used directly and + // objects whose address is used in casts and GEPs, though it only makes + // sense for GEPs if the offset is a multiple of the desired alignment and + // if size - offset meets the size threshold. + if (!Arg->getType()->isPointerTy()) + continue; unsigned MinSize; Align PrefAlign; - if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { + if (TLI->shouldUpdatePointerArgAlignment(CI, Arg, MinSize, PrefAlign)) { APInt Offset(DL->getIndexSizeInBits( cast(Arg->getType())->getAddressSpace()), 0); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -43,6 +43,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" @@ -946,6 +947,39 @@ return TM.isNoopAddrSpaceCast(SrcAS, DestAS); } +bool TargetLoweringBase::shouldUpdatePointerArgAlignment( + const CallInst *CI, const Value *Arg, unsigned &MinSize, + Align &PrefAlign) const { + // For now, we only adjust alignment for memcpy/memmove/memset calls. + auto *MemCI = dyn_cast(CI); + if (!MemCI) + return false; + // When building with -Oz, we only increase the alignment if the object is + // at least 8 bytes in size to avoid increased stack/global padding. + MinSize = CI->getFunction()->hasMinSize() ? 8 : 1; + auto AddrSpace = MemCI->getDestAddressSpace(); + const DataLayout &DL = CI->getModule()->getDataLayout(); + auto PointerSize = DL.getPointerSize(AddrSpace); + + // We assume that loads/stores of values aligned to pointer size are fast. + PrefAlign = Align(PointerSize); + if (Arg->getPointerAlignment(DL) > PrefAlign) + return false; // already aligned, not need to update it. + + // XXX: we could determine the MachineMemOperand flags instead of assuming + bool FastUnalignedAccess = false; + // load+store (but it probably makes no difference for supported targets). + if (allowsMisalignedMemoryAccesses(llvm::LLT::pointer(AddrSpace, PointerSize), + AddrSpace, Arg->getPointerAlignment(DL), + MachineMemOperand::MOStore | + MachineMemOperand::MOLoad, + &FastUnalignedAccess)) { + // If unaligned loads&stores are fast, there is no need to adjust alignment. + return !FastUnalignedAccess; + } + return true; // unaligned accesses are not possible or slow. +} + void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) { // If the command-line option was specified, ignore this request. if (!JumpIsExpensiveOverride.getNumOccurrences()) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -572,8 +572,9 @@ const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent = false) const override; - bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, - Align &PrefAlign) const override; + bool shouldUpdatePointerArgAlignment(const CallInst *CI, const Value *Arg, + unsigned &MinSize, + Align &PrefAlign) const override; /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1919,11 +1919,12 @@ // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the // source/dest is aligned and the copy size is large enough. We therefore want // to align such objects passed to memory intrinsics. -bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, - Align &PrefAlign) const { +bool ARMTargetLowering::shouldUpdatePointerArgAlignment( + const CallInst *CI, const Value *Arg, unsigned &MinSize, + Align &PrefAlign) const { if (!isa(CI)) return false; - MinSize = 8; + MinSize = 8; // TODO: should this depend on -Oz? // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1 // cycle faster than 4-byte aligned LDM. PrefAlign = diff --git a/llvm/test/CodeGen/BPF/rodata_5.ll b/llvm/test/CodeGen/BPF/rodata_5.ll --- a/llvm/test/CodeGen/BPF/rodata_5.ll +++ b/llvm/test/CodeGen/BPF/rodata_5.ll @@ -35,8 +35,8 @@ } ; CHECK-NOT: w{{[0-9]+}} = *(u16 *) ; CHECK-NOT: w{{[0-9]+}} = *(u8 *) -; CHECK: *(u16 *)(r10 - 4) = w{{[0-9]+}} -; CHECK: *(u8 *)(r10 - 2) = w{{[0-9]+}} +; CHECK: *(u16 *)(r10 - 8) = w{{[0-9]+}} +; CHECK: *(u8 *)(r10 - 6) = w{{[0-9]+}} ; Function Attrs: argmemonly nounwind willreturn declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) diff --git a/llvm/test/CodeGen/RISCV/memcpy-inline.ll b/llvm/test/CodeGen/RISCV/memcpy-inline.ll --- a/llvm/test/CodeGen/RISCV/memcpy-inline.ll +++ b/llvm/test/CodeGen/RISCV/memcpy-inline.ll @@ -295,50 +295,35 @@ } define void @t6() nounwind { -; RV32ALIGNED-LABEL: t6: -; RV32ALIGNED: # %bb.0: # %entry -; RV32ALIGNED-NEXT: addi sp, sp, -16 -; RV32ALIGNED-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ALIGNED-NEXT: lui a0, %hi(spool.splbuf) -; RV32ALIGNED-NEXT: addi a0, a0, %lo(spool.splbuf) -; RV32ALIGNED-NEXT: lui a1, %hi(.L.str6) -; RV32ALIGNED-NEXT: addi a1, a1, %lo(.L.str6) -; RV32ALIGNED-NEXT: li a2, 14 -; RV32ALIGNED-NEXT: call memcpy@plt -; RV32ALIGNED-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ALIGNED-NEXT: addi sp, sp, 16 -; RV32ALIGNED-NEXT: ret +; RV32-LABEL: t6: +; RV32: # %bb.0: # %entry +; RV32-NEXT: lui a0, %hi(spool.splbuf) +; RV32-NEXT: li a1, 88 +; RV32-NEXT: sh a1, %lo(spool.splbuf+12)(a0) +; RV32-NEXT: lui a1, 361862 +; RV32-NEXT: addi a1, a1, -1960 +; RV32-NEXT: sw a1, %lo(spool.splbuf+8)(a0) +; RV32-NEXT: lui a1, 362199 +; RV32-NEXT: addi a1, a1, 559 +; RV32-NEXT: sw a1, %lo(spool.splbuf+4)(a0) +; RV32-NEXT: lui a1, 460503 +; RV32-NEXT: addi a1, a1, 1071 +; RV32-NEXT: sw a1, %lo(spool.splbuf)(a0) +; RV32-NEXT: ret ; ; RV64ALIGNED-LABEL: t6: ; RV64ALIGNED: # %bb.0: # %entry -; RV64ALIGNED-NEXT: addi sp, sp, -16 -; RV64ALIGNED-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ALIGNED-NEXT: lui a0, %hi(spool.splbuf) -; RV64ALIGNED-NEXT: addi a0, a0, %lo(spool.splbuf) -; RV64ALIGNED-NEXT: lui a1, %hi(.L.str6) -; RV64ALIGNED-NEXT: addi a1, a1, %lo(.L.str6) -; RV64ALIGNED-NEXT: li a2, 14 -; RV64ALIGNED-NEXT: call memcpy@plt -; RV64ALIGNED-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64ALIGNED-NEXT: addi sp, sp, 16 +; RV64ALIGNED-NEXT: li a1, 88 +; RV64ALIGNED-NEXT: sh a1, %lo(spool.splbuf+12)(a0) +; RV64ALIGNED-NEXT: lui a1, %hi(.LCPI6_0) +; RV64ALIGNED-NEXT: ld a1, %lo(.LCPI6_0)(a1) +; RV64ALIGNED-NEXT: lui a2, 361862 +; RV64ALIGNED-NEXT: addiw a2, a2, -1960 +; RV64ALIGNED-NEXT: sw a2, %lo(spool.splbuf+8)(a0) +; RV64ALIGNED-NEXT: sd a1, %lo(spool.splbuf)(a0) ; RV64ALIGNED-NEXT: ret ; -; RV32UNALIGNED-LABEL: t6: -; RV32UNALIGNED: # %bb.0: # %entry -; RV32UNALIGNED-NEXT: lui a0, %hi(spool.splbuf) -; RV32UNALIGNED-NEXT: li a1, 88 -; RV32UNALIGNED-NEXT: sh a1, %lo(spool.splbuf+12)(a0) -; RV32UNALIGNED-NEXT: lui a1, 361862 -; RV32UNALIGNED-NEXT: addi a1, a1, -1960 -; RV32UNALIGNED-NEXT: sw a1, %lo(spool.splbuf+8)(a0) -; RV32UNALIGNED-NEXT: lui a1, 362199 -; RV32UNALIGNED-NEXT: addi a1, a1, 559 -; RV32UNALIGNED-NEXT: sw a1, %lo(spool.splbuf+4)(a0) -; RV32UNALIGNED-NEXT: lui a1, 460503 -; RV32UNALIGNED-NEXT: addi a1, a1, 1071 -; RV32UNALIGNED-NEXT: sw a1, %lo(spool.splbuf)(a0) -; RV32UNALIGNED-NEXT: ret -; ; RV64UNALIGNED-LABEL: t6: ; RV64UNALIGNED: # %bb.0: # %entry ; RV64UNALIGNED-NEXT: lui a0, %hi(.L.str6) diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll --- a/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll +++ b/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll @@ -160,7 +160,7 @@ ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112 ; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] -; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 8 ; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] ; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]] @@ -178,7 +178,7 @@ ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112 ; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] -; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 8 ; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] ; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 ; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]] @@ -196,7 +196,7 @@ ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112 ; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] -; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 8 ; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] ; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 ; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]] diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll @@ -18,7 +18,7 @@ ; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.f ; ALL: G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.coerce.dive2) - ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) + ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.0, align 8), (load (s8) from %ir.1, align 8) ; ALL: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.coerce.dive13) ; ALL: $xmm0 = COPY [[LOAD]](s32) ; ALL: RET 0, implicit $xmm0 @@ -108,7 +108,7 @@ ; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i ; ALL: G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.coerce.dive2) - ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) + ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.0, align 8), (load (s8) from %ir.1, align 8) ; ALL: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.coerce.dive13) ; ALL: $eax = COPY [[LOAD]](s32) ; ALL: RET 0, implicit $eax @@ -134,7 +134,7 @@ ; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i ; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.0, align 4) - ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.1, align 4), (load (s8) from %ir.2, align 4) + ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.1, align 8), (load (s8) from %ir.2, align 8) ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.3, align 4) ; ALL: $rax = COPY [[LOAD]](s64) ; ALL: RET 0, implicit $rax @@ -166,9 +166,9 @@ ; ALL: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; ALL: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX2]], [[C1]](s64) ; ALL: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.1) - ; ALL: G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store (s8) into %ir.2, align 4), (load (s8) from %ir.3, align 4) - ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.4, align 4), (load (s8) from %ir.5, align 4) - ; ALL: G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store (s8) into %ir.6, align 8), (load (s8) from %ir.7, align 4) + ; ALL: G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store (s8) into %ir.2, align 8), (load (s8) from %ir.3, align 8) + ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.4, align 8), (load (s8) from %ir.5, align 8) + ; ALL: G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store (s8) into %ir.6, align 8), (load (s8) from %ir.7, align 8) ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX3]](p0) :: (dereferenceable load (s64) from %ir.tmp) ; ALL: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s64) ; ALL: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s32) from %ir.tmp + 8, align 8) @@ -210,7 +210,7 @@ ; ALL: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; ALL: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) ; ALL: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.2, align 4) - ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 4), (load (s8) from %ir.4, align 4) + ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 8), (load (s8) from %ir.4, align 8) ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.5, align 4) ; ALL: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) ; ALL: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s64) from %ir.5 + 8, align 4) diff --git a/llvm/test/CodeGen/X86/load-local-v4i5.ll b/llvm/test/CodeGen/X86/load-local-v4i5.ll --- a/llvm/test/CodeGen/X86/load-local-v4i5.ll +++ b/llvm/test/CodeGen/X86/load-local-v4i5.ll @@ -7,10 +7,10 @@ ; CHECK-LABEL: _start: ; CHECK: # %bb.0: # %Entry ; CHECK-NEXT: movl __unnamed_1(%rip), %eax -; CHECK-NEXT: movl %eax, -12(%rsp) -; CHECK-NEXT: movzbl -9(%rsp), %ecx -; CHECK-NEXT: movzbl -10(%rsp), %edx -; CHECK-NEXT: movzbl -11(%rsp), %esi +; CHECK-NEXT: movl %eax, -16(%rsp) +; CHECK-NEXT: movzbl -13(%rsp), %ecx +; CHECK-NEXT: movzbl -14(%rsp), %edx +; CHECK-NEXT: movzbl -15(%rsp), %esi ; CHECK-NEXT: movzbl %cl, %edi ; CHECK-NEXT: shrb %cl ; CHECK-NEXT: movb %cl, -2(%rsp) diff --git a/llvm/test/Transforms/CodeGenPrepare/RISCV/adjust-memintrin-alignment.ll b/llvm/test/Transforms/CodeGenPrepare/RISCV/adjust-memintrin-alignment.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/RISCV/adjust-memintrin-alignment.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --force-update +; RUN: opt -mtriple=riscv64 -data-layout="e-m:e-p:32:32" -S -codegenprepare < %s \ +; RUN: | FileCheck %s '-D#NEW_ALIGNMENT=4' +; RUN: opt -mtriple=riscv32 -data-layout="e-m:e-p:64:64" -S -codegenprepare < %s \ +; RUN: | FileCheck %s '-D#NEW_ALIGNMENT=8' + +@str = private unnamed_addr constant [45 x i8] c"THIS IS A LONG STRING THAT SHOULD BE ALIGNED\00", align 1 + + +declare void @use(ptr %arg) + + +; CHECK: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [45 x i8] c"THIS IS A LONG STRING THAT SHOULD BE ALIGNED\00", align [[#NEW_ALIGNMENT]] + +define void @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DST:%.*]] = alloca [45 x i8], align [[#NEW_ALIGNMENT]] +; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i32(ptr align [[#NEW_ALIGNMENT]] [[DST]], ptr align [[#NEW_ALIGNMENT]] dereferenceable(31) @str, i32 31, i1 false) +; CHECK-NEXT: ret void + +entry: + %dst = alloca [45 x i8], align 1 + tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %dst, ptr align 1 dereferenceable(31) @str, i32 31, i1 false) + ; Ensure the alloca is used to avoid it being optimized out + ; call void @use(%dst) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1)