Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2127,7 +2127,7 @@ // idea unsigned MinSize; Align PrefAlign; - if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { + if (!OptSize && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { for (auto &Arg : CI->args()) { // We want to align both objects whose address is used directly and // objects whose address is used in casts and GEPs, though it only makes Index: llvm/test/CodeGen/ARM/memcpy-no-inline.ll =================================================================== --- llvm/test/CodeGen/ARM/memcpy-no-inline.ll +++ llvm/test/CodeGen/ARM/memcpy-no-inline.ll @@ -4,6 +4,7 @@ @.str = private unnamed_addr constant [31 x i8] c"012345678901234567890123456789\00", align 1 @.str.1 = private unnamed_addr constant [21 x i8] c"01234567890123456789\00", align 1 +@.str.2 = private unnamed_addr constant [21 x i8] c"01234567890123456789\00", align 4 @myglobal = common global %struct.mystruct zeroinitializer, align 1 @@ -21,13 +22,23 @@ define void @bar() #0 { entry: ; CHECK-LABEL: bar: -; CHECK-NOT: __aeabi_memcpy +; CHECK: __aeabi_memcpy %mystring = alloca [31 x i8], align 1 %0 = getelementptr inbounds [31 x i8], [31 x i8]* %mystring, i32 0, i32 0 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %0, i8* align 1 getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i32 0, i32 0), i32 21, i1 false) ret void } +define void @bar2() #0 { +entry: +; CHECK-LABEL: bar2: +; CHECK-NOT: __aeabi_memcpy + %mystring = alloca [32 x i8], align 4 + %0 = getelementptr inbounds [32 x i8], [32 x i8]* %mystring, i32 0, i32 0 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 getelementptr inbounds ([21 x i8], [21 x i8]* @.str.2, i32 0, i32 0), i32 21, i1 false) + ret void +} + declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #1 attributes #0 = { minsize noinline nounwind optsize } Index: llvm/test/CodeGen/ARM/memfunc.ll =================================================================== --- llvm/test/CodeGen/ARM/memfunc.ll +++ llvm/test/CodeGen/ARM/memfunc.ll @@ -377,6 +377,45 @@ ret void } +; Check that alloca arguments are not aligned when the function is minsize +define void @fminsize(i8* %dest, i32 %n) "frame-pointer"="all" minsize { +entry: + ; CHECK-LABEL: fminsize + + ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r(7|11), #17}} + ; CHECK-IOS: bl _memmove + ; CHECK-DARWIN: bl _memmove + ; CHECK-EABI: bl __aeabi_memmove + ; CHECK-GNUEABI: bl memmove + %arr0 = alloca [9 x i8], align 1 + %0 = bitcast [9 x i8]* %arr0 to i8* + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) + + ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #26}} + ; CHECK-IOS: bl _memcpy + ; CHECK-DARWIN: bl _memcpy + ; CHECK-EABI: bl __aeabi_memcpy + ; CHECK-GNUEABI: bl memcpy + %arr1 = alloca [9 x i8], align 1 + %1 = bitcast [9 x i8]* %arr1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) + + ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #35}} + ; CHECK-IOS: mov r1, #1 + ; CHECK-IOS: bl _memset + ; CHECK-DARWIN: movs r1, #1 + ; CHECK-DARWIN: bl _memset + ; CHECK-EABI: mov r2, #1 + ; CHECK-EABI: bl __aeabi_memset + ; CHECK-GNUEABI: mov r1, #1 + ; CHECK-GNUEABI: bl memset + %arr2 = alloca [9 x i8], align 1 + %2 = bitcast [9 x i8]* %arr2 to i8* + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) + + ret void +} + ; Check that global variables are aligned if they are large enough, but only if ; they are defined in this object and don't have an explicit section. @arr1 = global [7 x i8] c"\01\02\03\04\05\06\07", align 1