Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -3072,41 +3072,8 @@ return true; } -/// Return true if all users of V are within some (any) function, looking through -/// ConstantExprs. In other words, are there any global constant users? -static bool allUsersAreInFunctions(const Value *V) { - SmallVector Worklist; - for (auto *U : V->users()) - Worklist.push_back(U); - while (!Worklist.empty()) { - auto *U = Worklist.pop_back_val(); - if (isa(U)) { - for (auto *UU : U->users()) - Worklist.push_back(UU); - continue; - } - - if (!isa(U)) - return false; - } - return true; -} - -// Return true if T is an integer, float or an array/vector of either. -static bool isSimpleType(Type *T) { - if (T->isIntegerTy() || T->isFloatingPointTy()) - return true; - Type *SubT = nullptr; - if (T->isArrayTy()) - SubT = T->getArrayElementType(); - else if (T->isVectorTy()) - SubT = T->getVectorElementType(); - else - return false; - return SubT->isIntegerTy() || SubT->isFloatingPointTy(); -} - -static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, +static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, + const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl) { // If we're creating a pool entry for a constant global with unnamed address, // and the global is small enough, we can emit it inline into the constant pool @@ -3133,11 +3100,11 @@ !GVar->hasLocalLinkage()) return SDValue(); - // Ensure that we don't try and inline any type that contains pointers. If - // we inline a value that contains relocations, we move the relocations from - // .data to .text which is not ideal. + // If we inline a value that contains relocations, we move the relocations + // from .data to .text. This is not allowed in position-independent code. auto *Init = GVar->getInitializer(); - if (!isSimpleType(Init->getType())) + if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) && + Init->needsRelocation()) return SDValue(); // The constant islands pass can only really deal with alignment requests @@ -3169,12 +3136,14 @@ ConstpoolPromotionMaxTotal) return SDValue(); - // This is only valid if all users are in a single function OR it has users - // in multiple functions but it no larger than a pointer. We also check if - // GVar has constant (non-ConstantExpr) users. If so, it essentially has its - // address taken. - if (!allUsersAreInFunction(GVar, &F) && - !(Size <= 4 && allUsersAreInFunctions(GVar))) + // This is only valid if all users are in a single function; we can't clone + // the constant in general. The LLVM IR unnamed_addr allows merging + // constants, but not cloning them. + // + // We could potentially allow cloning if we could prove all uses of the + // constant in the current function don't care about the address, like + // printf format strings. But that isn't implemented for now. + if (!allUsersAreInFunction(GVar, &F)) return SDValue(); // We're going to inline this global. Pad it out if needed. @@ -3230,7 +3199,7 @@ // promoteToConstantPool only if not generating XO text section if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly()) - if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl)) + if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl)) return V; if (isPositionIndependent()) { Index: llvm/trunk/test/CodeGen/ARM/constantpool-promote-dbg.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/constantpool-promote-dbg.ll +++ llvm/trunk/test/CodeGen/ARM/constantpool-promote-dbg.ll @@ -6,14 +6,14 @@ @.str = private unnamed_addr constant [4 x i8] c"abc\00", align 1 ; CHECK-LABEL: fn1 -; CHECK: .str: +; CHECK: .long .L.str define arm_aapcscc i8* @fn1() local_unnamed_addr #0 !dbg !8 { entry: ret i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), !dbg !14 } ; CHECK-LABEL: fn2 -; CHECK-NOT: .str: +; CHECK: .long .L.str define arm_aapcscc i8* @fn2() local_unnamed_addr #0 !dbg !15 { entry: ret i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 1), !dbg !16 Index: llvm/trunk/test/CodeGen/ARM/constantpool-promote.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/constantpool-promote.ll +++ llvm/trunk/test/CodeGen/ARM/constantpool-promote.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=static -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM -; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=pic -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM +; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=static -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM,CHECK-STATIC +; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=pic -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM,CHECK-PIC ; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=ropi -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM ; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=rwpi -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM ; RUN: llc -mtriple thumbv7--linux-gnueabihf -relocation-model=static -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7THUMB @@ -16,11 +16,13 @@ @.str2 = private unnamed_addr constant [27 x i8] c"this string is just right!\00", align 1 @.str3 = private unnamed_addr constant [26 x i8] c"this string is used twice\00", align 1 @.str4 = private unnamed_addr constant [29 x i8] c"same string in two functions\00", align 1 +@.str5 = private unnamed_addr constant [2 x i8] c"s\00", align 1 @.arr1 = private unnamed_addr constant [2 x i16] [i16 3, i16 4], align 2 @.arr2 = private unnamed_addr constant [2 x i16] [i16 7, i16 8], align 2 @.arr3 = private unnamed_addr constant [2 x i16*] [i16* null, i16* null], align 4 @.ptr = private unnamed_addr constant [2 x i16*] [i16* getelementptr inbounds ([2 x i16], [2 x i16]* @.arr2, i32 0, i32 0), i16* null], align 2 @.arr4 = private unnamed_addr constant [2 x i16] [i16 3, i16 4], align 16 +@.arr5 = private unnamed_addr constant [2 x i16] [i16 3, i16 4], align 2 @.zerosize = private unnamed_addr constant [0 x i16] zeroinitializer, align 4 @implicit_alignment_vector = private unnamed_addr constant <4 x i32> @@ -76,20 +78,14 @@ } ; CHECK-LABEL: @test6a -; CHECK: adr r0, [[x:.*]] -; CHECK: [[x]]: -; CHECK: .short 3 -; CHECK: .short 4 +; CHECK: L.arr1 define void @test6a() #0 { tail call void @c(i16* getelementptr inbounds ([2 x i16], [2 x i16]* @.arr1, i32 0, i32 0)) #2 ret void } ; CHECK-LABEL: @test6b -; CHECK: adr r0, [[x:.*]] -; CHECK: [[x]]: -; CHECK: .short 3 -; CHECK: .short 4 +; CHECK: L.arr1 define void @test6b() #0 { tail call void @c(i16* getelementptr inbounds ([2 x i16], [2 x i16]* @.arr1, i32 0, i32 0)) #2 ret void @@ -103,9 +99,9 @@ ret void } -; This shouldn't be promoted, because the array contains pointers. +; This can be promoted; it contains pointers, but they don't need relocations. ; CHECK-LABEL: @test8 -; CHECK-NOT: .zero +; CHECK: .zero ; CHECK: .fnend define void @test8() #0 { %a = load i16*, i16** getelementptr inbounds ([2 x i16*], [2 x i16*]* @.arr3, i32 0, i32 0) @@ -113,6 +109,17 @@ ret void } +; This can't be promoted in PIC mode because it contains pointers to other globals. +; CHECK-LABEL: @test8a +; CHECK-STATIC: .long .L.arr2 +; CHECK-PIC: .long .L.ptr +; CHECK: .fnend +define void @test8a() #0 { + %a = load i16*, i16** getelementptr inbounds ([2 x i16*], [2 x i16*]* @.ptr, i32 0, i32 0) + tail call void @c(i16* %a) #2 + ret void +} + @fn1.a = private unnamed_addr constant [4 x i16] [i16 4, i16 0, i16 0, i16 0], align 2 @fn2.a = private unnamed_addr constant [8 x i8] [i8 4, i8 0, i8 0, i8 0, i8 23, i8 0, i8 6, i8 0], align 1 @@ -157,7 +164,7 @@ ; CHECK-V7: [[x]]: ; CHECK-V7: .asciz "s\000\000" define void @test10(i8* %a) local_unnamed_addr #0 { - call void @llvm.memmove.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0), i32 1, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 getelementptr inbounds ([2 x i8], [2 x i8]* @.str5, i32 0, i32 0), i32 1, i1 false) ret void } @@ -175,7 +182,7 @@ ; CHECK-V7ARM: .short 3 ; CHECK-V7ARM: .short 4 define void @test11(i16* %a) local_unnamed_addr #0 { - call void @llvm.memmove.p0i16.p0i16.i32(i16* align 2 %a, i16* align 2 getelementptr inbounds ([2 x i16], [2 x i16]* @.arr1, i32 0, i32 0), i32 2, i1 false) + call void @llvm.memmove.p0i16.p0i16.i32(i16* align 2 %a, i16* align 2 getelementptr inbounds ([2 x i16], [2 x i16]* @.arr5, i32 0, i32 0), i32 2, i1 false) ret void }