Index: llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp +++ llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp @@ -357,6 +357,41 @@ return Changed; } +static bool isSafeSROAElementUse(Value *V); + +/// Return true if the specified GEP is a safe user of a derived +/// expression from a global that we want to SROA. +static bool isSafeSROAGEP(User *U) { + // Check to see if this ConstantExpr GEP is SRA'able. In particular, we + // don't like < 3 operand CE's, and we don't like non-constant integer + // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some + // value of C. + if (U->getNumOperands() < 3 || !isa(U->getOperand(1)) || + !cast(U->getOperand(1))->isNullValue()) + return false; + + gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); + ++GEPI; // Skip over the pointer index. + + // For all other level we require that the indices are constant and inrange. + // In particular, consider: A[0][i]. We cannot know that the user isn't doing + // invalid things like allowing i to index an out-of-range subscript that + // accesses A[1]. This can also happen between different members of a struct + // in llvm IR. + for (; GEPI != E; ++GEPI) { + if (GEPI.isStruct()) + continue; + + ConstantInt *IdxVal = dyn_cast(GEPI.getOperand()); + if (!IdxVal || (GEPI.isBoundedSequential() && + IdxVal->getZExtValue() >= GEPI.getSequentialNumElements())) + return false; + } + + return llvm::all_of(U->users(), + [](User *UU) { return isSafeSROAElementUse(UU); }); +} + /// Return true if the specified instruction is a safe user of a derived /// expression from a global that we want to SROA. static bool isSafeSROAElementUse(Value *V) { @@ -374,84 +409,25 @@ if (StoreInst *SI = dyn_cast(I)) return SI->getOperand(0) != V; - // Otherwise, it must be a GEP. - GetElementPtrInst *GEPI = dyn_cast(I); - if (!GEPI) return false; - - if (GEPI->getNumOperands() < 3 || !isa(GEPI->getOperand(1)) || - !cast(GEPI->getOperand(1))->isNullValue()) - return false; - - for (User *U : GEPI->users()) - if (!isSafeSROAElementUse(U)) - return false; - return true; -} - -/// U is a direct user of the specified global value. Look at it and its uses -/// and decide whether it is safe to SROA this global. -static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { - // The user of the global must be a GEP Inst or a ConstantExpr GEP. - if (!isa(U) && - (!isa(U) || - cast(U)->getOpcode() != Instruction::GetElementPtr)) - return false; - - // Check to see if this ConstantExpr GEP is SRA'able. In particular, we - // don't like < 3 operand CE's, and we don't like non-constant integer - // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some - // value of C. - if (U->getNumOperands() < 3 || !isa(U->getOperand(1)) || - !cast(U->getOperand(1))->isNullValue() || - !isa(U->getOperand(2))) - return false; - - gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); - ++GEPI; // Skip over the pointer index. - - // If this is a use of an array allocation, do a bit more checking for sanity. - if (GEPI.isSequential()) { - ConstantInt *Idx = cast(U->getOperand(2)); - - // Check to make sure that index falls within the array. If not, - // something funny is going on, so we won't do the optimization. - // - if (GEPI.isBoundedSequential() && - Idx->getZExtValue() >= GEPI.getSequentialNumElements()) - return false; - - // We cannot scalar repl this level of the array unless any array - // sub-indices are in-range constants. In particular, consider: - // A[0][i]. We cannot know that the user isn't doing invalid things like - // allowing i to index an out-of-range subscript that accesses A[1]. - // - // Scalar replacing *just* the outer index of the array is probably not - // going to be a win anyway, so just give up. - for (++GEPI; // Skip array index. - GEPI != E; - ++GEPI) { - if (GEPI.isStruct()) - continue; - - ConstantInt *IdxVal = dyn_cast(GEPI.getOperand()); - if (!IdxVal || - (GEPI.isBoundedSequential() && - IdxVal->getZExtValue() >= GEPI.getSequentialNumElements())) - return false; - } - } - - return llvm::all_of(U->users(), - [](User *UU) { return isSafeSROAElementUse(UU); }); + // Otherwise, it must be a GEP. Check it and its users are safe to SRA. + return isa(I) && isSafeSROAGEP(I); } /// Look at all uses of the global and decide whether it is safe for us to /// perform this transformation. static bool GlobalUsersSafeToSRA(GlobalValue *GV) { - for (User *U : GV->users()) - if (!IsUserOfGlobalSafeForSRA(U, GV)) + for (User *U : GV->users()) { + // The user of the global must be a GEP Inst or a ConstantExpr GEP. + if (!isa(U) && + (!isa(U) || + cast(U)->getOpcode() != Instruction::GetElementPtr)) return false; + // Check the gep and it's users are safe to SRA + if (!isSafeSROAGEP(U)) + return false; + } + return true; } Index: llvm/trunk/test/Transforms/GlobalOpt/globalsra-multigep.ll =================================================================== --- llvm/trunk/test/Transforms/GlobalOpt/globalsra-multigep.ll +++ llvm/trunk/test/Transforms/GlobalOpt/globalsra-multigep.ll @@ -0,0 +1,16 @@ +; RUN: opt < %s -globalopt -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@g_data = internal unnamed_addr global <{ [8 x i16], [8 x i16] }> <{ [8 x i16] [i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16], [8 x i16] zeroinitializer }>, align 16 +; We cannot SRA here due to the second gep meaning the access to g_data may be to either element +; CHECK: @g_data = internal unnamed_addr constant <{ [8 x i16], [8 x i16] }> + +define i16 @test(i64 %a1) { +entry: + %g1 = getelementptr inbounds <{ [8 x i16], [8 x i16] }>, <{ [8 x i16], [8 x i16] }>* @g_data, i64 0, i32 0 + %arrayidx.i = getelementptr inbounds [8 x i16], [8 x i16]* %g1, i64 0, i64 %a1 + %r = load i16, i16* %arrayidx.i, align 2 + ret i16 %r +} Index: llvm/trunk/test/Transforms/GlobalOpt/globalsra-partial.ll =================================================================== --- llvm/trunk/test/Transforms/GlobalOpt/globalsra-partial.ll +++ llvm/trunk/test/Transforms/GlobalOpt/globalsra-partial.ll @@ -1,11 +1,12 @@ -; In this case, the global can only be broken up by one level. +; In this case, the global cannot be merged as i may be out of range ; RUN: opt < %s -globalopt -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @G = internal global { i32, [4 x float] } zeroinitializer ; <{ i32, [4 x float] }*> [#uses=3] -; CHECK-NOT: 12345 +; CHECK: @G = internal unnamed_addr global { i32, [4 x float] } +; CHECK: 12345 define void @onlystore() { store i32 12345, i32* getelementptr ({ i32, [4 x float] }, { i32, [4 x float] }* @G, i32 0, i32 0) ret void