Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -7793,6 +7793,51 @@ isa(GEP->getOperand(1)); } +// Returns the size of the GEP type. +// GEP must meet GEPSequentialConstIndexed requirements. +static TypeSize GEPSequentialConstIndexedTypeSize(GetElementPtrInst &GEP) { + const DataLayout &DL = GEP.getModule()->getDataLayout(); + gep_type_iterator GTI = gep_type_begin(GEP); + return DL.getTypeAllocSize(GTI.getIndexedType()); +} + +// Returns true if the Target can be addressed from Source +// in case types mismatch. +// Both GEPs must meet GEPSequentialConstIndexed requirements. +static bool GEPIsAddressableFromSource(GetElementPtrInst &Target, + GetElementPtrInst &Source) { + if (Source.getSourceElementType() == Target.getSourceElementType()) { + return true; + } + + TypeSize SourceTypeSize = GEPSequentialConstIndexedTypeSize(Source); + TypeSize TargetTypeSize = GEPSequentialConstIndexedTypeSize(Target); + + return (TargetTypeSize % SourceTypeSize) == 0; +} + +// Calculates the relative index required to address Target +// from Source with respect to their element types. +// Both GEPs must meet GEPSequentialConstIndexed requirements. +static APInt GEPCalculateRelativeIndex(GetElementPtrInst &Target, + GetElementPtrInst &Source) { + uint64_t Scale = 1; + + if (Source.getSourceElementType() != Target.getSourceElementType()) { + TypeSize SourceTypeSize = GEPSequentialConstIndexedTypeSize(Source); + TypeSize TargetTypeSize = GEPSequentialConstIndexedTypeSize(Target); + + Scale = TargetTypeSize / SourceTypeSize; + assert(Scale * SourceTypeSize == TargetTypeSize && + "Target GEP must be addressable from Source GEP"); + } + + ConstantInt *SourceIdx = cast(Source.getOperand(1)); + ConstantInt *TargetIdx = cast(Target.getOperand(1)); + + return (TargetIdx->getValue() * Scale) - SourceIdx->getValue(); +} + // Try unmerging GEPs to reduce liveness interference (register pressure) across // IndirectBr edges. Since IndirectBr edges tend to touch on many blocks, // reducing liveness interference across those edges benefits global register @@ -7907,6 +7952,9 @@ // up. if (!GEPSequentialConstIndexed(UGEPI)) return false; + // Check if GEP Types match or if types are compatible + if (!GEPIsAddressableFromSource(*UGEPI, *GEPI)) + return false; if (UGEPI->getOperand(0) != GEPIOp) return false; if (GEPIIdx->getType() != @@ -7923,8 +7971,7 @@ return false; // Check the materializing cost of (Uidx-Idx). for (GetElementPtrInst *UGEPI : UGEPIs) { - ConstantInt *UGEPIIdx = cast(UGEPI->getOperand(1)); - APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue(); + APInt NewIdx = GEPCalculateRelativeIndex(*UGEPI, *GEPI); InstructionCost ImmCost = TTI->getIntImmCost( NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency); if (ImmCost > TargetTransformInfo::TCC_Basic) @@ -7932,11 +7979,12 @@ } // Now unmerge between GEPI and UGEPIs. for (GetElementPtrInst *UGEPI : UGEPIs) { + APInt NewIdx = GEPCalculateRelativeIndex(*UGEPI, *GEPI); + Constant *NewUGEPIIdx = ConstantInt::get(GEPIIdx->getType(), NewIdx); UGEPI->setOperand(0, GEPI); - ConstantInt *UGEPIIdx = cast(UGEPI->getOperand(1)); - Constant *NewUGEPIIdx = ConstantInt::get( - GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue()); UGEPI->setOperand(1, NewUGEPIIdx); + UGEPI->setSourceElementType(GEPI->getSourceElementType()); + UGEPI->setResultElementType(GEPI->getResultElementType()); // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not // inbounds to avoid UB. if (!GEPI->isInBounds()) { Index: llvm/test/CodeGen/Generic/indirect-br-gep-unmerge.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Generic/indirect-br-gep-unmerge.ll @@ -0,0 +1,29 @@ +; RUN: llc %s -stop-after=codegenprepare -o - | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.Blub = type { i8, i8, ptr } + +@indirectBrPtr = external hidden global ptr + +define dso_local noundef ptr @testFunc(ptr noundef readonly %array, i1 %skip) { +entry: + br i1 %skip, label %loopHeader, label %endBlock + +loopHeader: ; preds = %2, %1 + %currentArrayElement = phi ptr [ %array, %entry ], [ %nextArrayElement, %loopFooter ] + %elementValuePtr = getelementptr inbounds i8, ptr %currentArrayElement, i64 1 + %elementValue = load i8, ptr %elementValuePtr, align 1 + indirectbr ptr @indirectBrPtr, [label %loopFooter, label %endBlock] + +loopFooter: + %isGoodValue = icmp eq i8 %elementValue, 0 + ; CHECK: %nextArrayElement = getelementptr inbounds i8, ptr %elementValuePtr, i64 15 + %nextArrayElement = getelementptr inbounds %struct.Blub, ptr %currentArrayElement, i64 1 + br i1 %isGoodValue, label %loopHeader, label %endBlock + +endBlock: ; preds = %2 + %retVal = phi ptr [ %array, %entry ], [ %elementValuePtr, %loopFooter ], [ %elementValuePtr, %loopHeader ] + ret ptr %retVal +}