Index: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp @@ -1288,13 +1288,25 @@ cast(Arg->getType())->getAddressSpace()), 0); Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset); uint64_t Offset2 = Offset.getLimitedValue(); + if ((Offset2 & (PrefAlign-1)) != 0) + continue; AllocaInst *AI; - if ((Offset2 & (PrefAlign-1)) == 0 && - (AI = dyn_cast(Val)) && + if ((AI = dyn_cast(Val)) && AI->getAlignment() < PrefAlign && TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) AI->setAlignment(PrefAlign); - // TODO: Also align GlobalVariables + // Global variables can only be aligned if they are defined in this + // object (i.e. they are uniquely initialized in this object), and + // over-aligning global variables that have an explicit section is + // forbidden. + GlobalVariable *GV; + if ((GV = dyn_cast(Val)) && + GV->hasUniqueInitializer() && + !GV->hasSection() && + GV->getAlignment() < PrefAlign && + TD->getTypeAllocSize( + GV->getType()->getElementType()) >= MinSize + Offset2) + GV->setAlignment(PrefAlign); } // If this is a memcpy (or similar) then we may be able to improve the // alignment Index: llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll +++ llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll @@ -30,7 +30,7 @@ define void @t1(i8* nocapture %C) nounwind { entry: ; CHECK-LABEL: t1: -; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] +; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] ; CHECK: adds r0, #15 ; CHECK: adds r1, #15 @@ -48,7 +48,7 @@ ; CHECK: str [[REG2]], [r0, #32] ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]! ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]! -; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] +; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false) ret void @@ -59,7 +59,7 @@ ; CHECK-LABEL: t3: ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]! ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]! -; CHECK: vld1.8 {d{{[0-9]+}}}, [r1] +; CHECK: vldr d{{[0-9]+}}, [r1] ; CHECK: vst1.8 {d{{[0-9]+}}}, [r0] tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false) ret void @@ -68,7 +68,7 @@ define void @t4(i8* nocapture %C) nounwind { entry: ; CHECK-LABEL: t4: -; CHECK: vld1.8 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1] +; CHECK: vld1.64 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1] ; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]! ; CHECK: strh [[REG5:r[0-9]+]], [r0] tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false) @@ -97,11 +97,11 @@ define void @t6() nounwind { entry: ; CHECK-LABEL: t6: -; CHECK: vld1.8 {[[REG9:d[0-9]+]]}, [r0] +; CHECK: vldr [[REG9:d[0-9]+]], [r0] ; CHECK: vstr [[REG9]], [r1] ; CHECK: adds r1, #6 ; CHECK: adds r0, #6 -; CHECK: vld1.8 +; CHECK: vld1.16 ; CHECK: vst1.16 ; CHECK-T1-LABEL: t6: ; CHECK-T1: movs [[TREG5:r[0-9]]], Index: llvm/trunk/test/CodeGen/ARM/memfunc.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/memfunc.ll +++ llvm/trunk/test/CodeGen/ARM/memfunc.ll @@ -3,22 +3,19 @@ ; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK ; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK -@from = common global [500 x i32] zeroinitializer, align 4 -@to = common global [500 x i32] zeroinitializer, align 4 - -define void @f1() { +define void @f1(i8* %dest, i8* %src) { entry: ; CHECK-LABEL: f1 ; CHECK-IOS: memmove ; CHECK-DARWIN: memmove ; CHECK-EABI: __aeabi_memmove - call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false) ; CHECK-IOS: memcpy ; CHECK-DARWIN: memcpy ; CHECK-EABI: __aeabi_memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false) ; EABI memset swaps arguments ; CHECK-IOS: mov r1, #0 @@ -27,7 +24,7 @@ ; CHECK-DARWIN: memset ; CHECK-EABI: mov r2, #0 ; CHECK-EABI: __aeabi_memset - call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 0, i1 false) unreachable } @@ -281,6 +278,47 @@ unreachable } +; Check that global variables are aligned if they are large enough, but only if +; they are defined in this object and don't have an explicit section. +@arr1 = global [7 x i8] c"\01\02\03\04\05\06\07", align 1 +@arr2 = global [8 x i8] c"\01\02\03\04\05\06\07\08", align 1 +@arr3 = global [7 x i8] c"\01\02\03\04\05\06\07", section "foo,bar", align 1 +@arr4 = global [8 x i8] c"\01\02\03\04\05\06\07\08", section "foo,bar", align 1 +@arr5 = weak global [7 x i8] c"\01\02\03\04\05\06\07", align 1 +@arr6 = weak_odr global [7 x i8] c"\01\02\03\04\05\06\07", align 1 +@arr7 = external global [7 x i8], align 1 +define void @f9(i8* %dest, i32 %n) { +entry: + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr1, i32 0, i32 0), i32 %n, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr2, i32 0, i32 0), i32 %n, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr3, i32 0, i32 0), i32 %n, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr4, i32 0, i32 0), i32 %n, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr5, i32 0, i32 0), i32 %n, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr6, i32 0, i32 0), i32 %n, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr7, i32 0, i32 0), i32 %n, i32 1, i1 false) + + unreachable +} + +; CHECK: {{\.data|\.section.+data}} +; CHECK-NOT: .align +; CHECK: arr1: +; CHECK-IOS: .align 3 +; CHECK-DARWIN: .align 2 +; CHECK-EABI: .align 2 +; CHECK: arr2: +; CHECK: {{\.section.+foo,bar}} +; CHECK-NOT: .align +; CHECK: arr3: +; CHECK-NOT: .align +; CHECK: arr4: +; CHECK: {{\.data|\.section.+data}} +; CHECK-NOT: .align +; CHECK: arr5: +; CHECK-NOT: .align +; CHECK: arr6: +; CHECK-NOT: arr7: + declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind