Index: ../docs/LangRef.rst =================================================================== --- ../docs/LangRef.rst +++ ../docs/LangRef.rst @@ -1065,7 +1065,7 @@ for return values. .. _attr_align: - + ``align `` This indicates that the pointer value may be assumed by the optimizer to have the specified alignment. @@ -1908,10 +1908,12 @@ ``A
`` Specifies the address space of objects created by '``alloca``'. Defaults to the default address space of 0. -``p[n]:::`` +``p[n]::::`` This specifies the *size* of a pointer and its ```` and - ````\erred alignments for address space ``n``. All sizes are in - bits. The address space, ``n``, is optional, and if not specified, + ````\erred alignments for address space ``n``. The fourth parameter + ```` is a size of index that used for address calculation. If not + specified, the default index size is equal to the pointer size. All sizes + are in bits. The address space, ``n``, is optional, and if not specified, denotes the default address space 0. The value of ``n`` must be in the range [1,2^23). ``i::`` @@ -2281,7 +2283,7 @@ LLVM IR floating-point operations (:ref:`fadd `, :ref:`fsub `, :ref:`fmul `, :ref:`fdiv `, :ref:`frem `, :ref:`fcmp `) and :ref:`call ` -may use the following flags to enable otherwise unsafe +may use the following flags to enable otherwise unsafe floating-point transformations. ``nnan`` @@ -2308,11 +2310,11 @@ ``afn`` Approximate functions - Allow substitution of approximate calculations for - functions (sin, log, sqrt, etc). See floating-point intrinsic definitions - for places where this can apply to LLVM's intrinsic math functions. + functions (sin, log, sqrt, etc). See floating-point intrinsic definitions + for places where this can apply to LLVM's intrinsic math functions. ``reassoc`` - Allow reassociation transformations for floating-point instructions. + Allow reassociation transformations for floating-point instructions. This may dramatically change results in floating point. ``fast`` @@ -6853,10 +6855,10 @@ Semantics: """""""""" -Return the same value as a libm '``fmod``' function but without trapping or +Return the same value as a libm '``fmod``' function but without trapping or setting ``errno``. -The remainder has the same sign as the dividend. This instruction can also +The remainder has the same sign as the dividend. This instruction can also take any number of :ref:`fast-math flags `, which are optimization hints to enable otherwise unsafe floating-point optimizations: @@ -10504,7 +10506,7 @@ """""""""" The '``llvm.memset.*``' intrinsics fill "len" bytes of memory starting -at the destination location. +at the destination location. '``llvm.sqrt.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -10538,10 +10540,10 @@ """""""""" Return the same value as a corresponding libm '``sqrt``' function but without -trapping or setting ``errno``. For types specified by IEEE-754, the result +trapping or setting ``errno``. For types specified by IEEE-754, the result matches a conforming libm implementation. -When specified with the fast-math-flag 'afn', the result may be approximated +When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. '``llvm.powi.*``' Intrinsic @@ -10616,7 +10618,7 @@ Return the same value as a corresponding libm '``sin``' function but without trapping or setting ``errno``. -When specified with the fast-math-flag 'afn', the result may be approximated +When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. '``llvm.cos.*``' Intrinsic @@ -10653,7 +10655,7 @@ Return the same value as a corresponding libm '``cos``' function but without trapping or setting ``errno``. -When specified with the fast-math-flag 'afn', the result may be approximated +When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. '``llvm.pow.*``' Intrinsic @@ -10691,7 +10693,7 @@ Return the same value as a corresponding libm '``pow``' function but without trapping or setting ``errno``. -When specified with the fast-math-flag 'afn', the result may be approximated +When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. '``llvm.exp.*``' Intrinsic @@ -10729,7 +10731,7 @@ Return the same value as a corresponding libm '``exp``' function but without trapping or setting ``errno``. -When specified with the fast-math-flag 'afn', the result may be approximated +When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. '``llvm.exp2.*``' Intrinsic @@ -10767,7 +10769,7 @@ Return the same value as a corresponding libm '``exp2``' function but without trapping or setting ``errno``. -When specified with the fast-math-flag 'afn', the result may be approximated +When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. '``llvm.log.*``' Intrinsic @@ -10805,7 +10807,7 @@ Return the same value as a corresponding libm '``log``' function but without trapping or setting ``errno``. -When specified with the fast-math-flag 'afn', the result may be approximated +When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. '``llvm.log10.*``' Intrinsic @@ -10843,7 +10845,7 @@ Return the same value as a corresponding libm '``log10``' function but without trapping or setting ``errno``. -When specified with the fast-math-flag 'afn', the result may be approximated +When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. '``llvm.log2.*``' Intrinsic @@ -10881,7 +10883,7 @@ Return the same value as a corresponding libm '``log2``' function but without trapping or setting ``errno``. -When specified with the fast-math-flag 'afn', the result may be approximated +When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. '``llvm.fma.*``' Intrinsic @@ -10918,7 +10920,7 @@ Return the same value as a corresponding libm '``fma``' function but without trapping or setting ``errno``. -When specified with the fast-math-flag 'afn', the result may be approximated +When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. '``llvm.fabs.*``' Intrinsic @@ -14558,4 +14560,3 @@ is replaced with an actual element size. The optimizer is allowed to inline the memory assignment when it's profitable to do so. - Index: ../include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- ../include/llvm/CodeGen/BasicTTIImpl.h +++ ../include/llvm/CodeGen/BasicTTIImpl.h @@ -240,7 +240,7 @@ bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent()); // Early exit if both a jump table and bit test are not allowed. - if (N < 1 || (!IsJTAllowed && DL.getPointerSizeInBits() < N)) + if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N)) return N; APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue(); @@ -254,7 +254,7 @@ } // Check if suitable for a bit test - if (N <= DL.getPointerSizeInBits()) { + if (N <= DL.getIndexSizeInBits(0u)) { SmallPtrSet Dests; for (auto I : SI.cases()) Dests.insert(I.getCaseSuccessor()); Index: ../include/llvm/CodeGen/TargetLowering.h =================================================================== --- ../include/llvm/CodeGen/TargetLowering.h +++ ../include/llvm/CodeGen/TargetLowering.h @@ -812,7 +812,7 @@ bool rangeFitsInWord(const APInt &Low, const APInt &High, const DataLayout &DL) const { // FIXME: Using the pointer type doesn't seem ideal. - uint64_t BW = DL.getPointerSizeInBits(); + uint64_t BW = DL.getIndexSizeInBits(0u); uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; return Range <= BW; } Index: ../include/llvm/IR/DataLayout.h =================================================================== --- ../include/llvm/IR/DataLayout.h +++ ../include/llvm/IR/DataLayout.h @@ -92,10 +92,12 @@ unsigned PrefAlign; uint32_t TypeByteWidth; uint32_t AddressSpace; + uint32_t IndexWidth; /// Initializer static PointerAlignElem get(uint32_t AddressSpace, unsigned ABIAlign, - unsigned PrefAlign, uint32_t TypeByteWidth); + unsigned PrefAlign, uint32_t TypeByteWidth, + uint32_t IndexWidth); bool operator==(const PointerAlignElem &rhs) const; }; @@ -165,7 +167,8 @@ unsigned getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width, bool ABIAlign, Type *Ty) const; void setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign, - unsigned PrefAlign, uint32_t TypeByteWidth); + unsigned PrefAlign, uint32_t TypeByteWidth, + uint32_t IndexWidth); /// Internal helper method that returns requested alignment for type. unsigned getAlignment(Type *Ty, bool abi_or_pref) const; @@ -321,6 +324,9 @@ /// the backends/clients are updated. unsigned getPointerSize(unsigned AS = 0) const; + // Index size used for address calculation. + unsigned getIndexSize(unsigned AS) const; + /// Return the address spaces containing non-integral pointers. Pointers in /// this address space don't have a well-defined bitwise representation. ArrayRef getNonIntegralAddressSpaces() const { @@ -345,6 +351,11 @@ return getPointerSize(AS) * 8; } + /// Size in bits of index used for address calculation in getelementptr. + unsigned getIndexSizeInBits(unsigned AS) const { + return getIndexSize(AS) * 8; + } + /// Layout pointer size, in bits, based on the type. If this function is /// called with a pointer type, then the type size of the pointer is returned. /// If this function is called with a vector of pointers, then the type size @@ -352,6 +363,10 @@ /// vector of pointers. unsigned getPointerTypeSizeInBits(Type *) const; + /// Layout size of the index used in GEP calculation. + /// The function should be called with pointer or vector of pointers type. + unsigned getIndexTypeSizeInBits(Type *Ty) const; + unsigned getPointerTypeSize(Type *Ty) const { return getPointerTypeSizeInBits(Ty) / 8; } @@ -453,6 +468,11 @@ /// are set. unsigned getLargestLegalIntTypeSizeInBits() const; + /// \brief Returns the type of a GEP index. + /// If it was not specified explicitly, it will be the integer type of the + /// pointer width - IntPtrType. + Type *getIndexType(Type *PtrTy) const; + /// \brief Returns the offset from the beginning of the type for the specified /// indices. /// Index: ../lib/Analysis/ConstantFolding.cpp =================================================================== --- ../lib/Analysis/ConstantFolding.cpp +++ ../lib/Analysis/ConstantFolding.cpp @@ -286,7 +286,7 @@ APInt &Offset, const DataLayout &DL) { // Trivial case, constant is the global. if ((GV = dyn_cast(C))) { - unsigned BitWidth = DL.getPointerTypeSizeInBits(GV->getType()); + unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType()); Offset = APInt(BitWidth, 0); return true; } @@ -305,7 +305,7 @@ if (!GEP) return false; - unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType()); + unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); APInt TmpOffset(BitWidth, 0); // If the base isn't a global+constant, we aren't either. @@ -808,26 +808,26 @@ // If this is a constant expr gep that is effectively computing an // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' for (unsigned i = 1, e = Ops.size(); i != e; ++i) - if (!isa(Ops[i])) { + if (!isa(Ops[i])) { - // If this is "gep i8* Ptr, (sub 0, V)", fold this as: - // "inttoptr (sub (ptrtoint Ptr), V)" - if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) { - auto *CE = dyn_cast(Ops[1]); - assert((!CE || CE->getType() == IntPtrTy) && - "CastGEPIndices didn't canonicalize index types!"); - if (CE && CE->getOpcode() == Instruction::Sub && - CE->getOperand(0)->isNullValue()) { - Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType()); - Res = ConstantExpr::getSub(Res, CE->getOperand(1)); - Res = ConstantExpr::getIntToPtr(Res, ResTy); - if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI)) - Res = FoldedRes; - return Res; + // If this is "gep i8* Ptr, (sub 0, V)", fold this as: + // "inttoptr (sub (ptrtoint Ptr), V)" + if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) { + auto *CE = dyn_cast(Ops[1]); + assert((!CE || CE->getType() == IntPtrTy) && + "CastGEPIndices didn't canonicalize index types!"); + if (CE && CE->getOpcode() == Instruction::Sub && + CE->getOperand(0)->isNullValue()) { + Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType()); + Res = ConstantExpr::getSub(Res, CE->getOperand(1)); + Res = ConstantExpr::getIntToPtr(Res, ResTy); + if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI)) + Res = FoldedRes; + return Res; + } } + return nullptr; } - return nullptr; - } unsigned BitWidth = DL.getTypeSizeInBits(IntPtrTy); APInt Offset = Index: ../lib/Analysis/InlineCost.cpp =================================================================== --- ../lib/Analysis/InlineCost.cpp +++ ../lib/Analysis/InlineCost.cpp @@ -372,7 +372,7 @@ /// Returns false if unable to compute the offset for any reason. Respects any /// simplified values known during the analysis of this callsite. bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { - unsigned IntPtrWidth = DL.getPointerTypeSizeInBits(GEP.getType()); + unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType()); assert(IntPtrWidth == Offset.getBitWidth()); for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); @@ -1619,7 +1619,7 @@ return nullptr; unsigned AS = V->getType()->getPointerAddressSpace(); - unsigned IntPtrWidth = DL.getPointerSizeInBits(AS); + unsigned IntPtrWidth = DL.getIndexSizeInBits(AS); APInt Offset = APInt::getNullValue(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an Index: ../lib/Analysis/InstructionSimplify.cpp =================================================================== --- ../lib/Analysis/InstructionSimplify.cpp +++ ../lib/Analysis/InstructionSimplify.cpp @@ -3762,7 +3762,7 @@ // The following transforms are only safe if the ptrtoint cast // doesn't truncate the pointers. if (Ops[1]->getType()->getScalarSizeInBits() == - Q.DL.getPointerSizeInBits(AS)) { + Q.DL.getIndexSizeInBits(AS)) { auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * { if (match(P, m_Zero())) return Constant::getNullValue(GEPTy); @@ -3802,10 +3802,10 @@ if (Q.DL.getTypeAllocSize(LastType) == 1 && all_of(Ops.slice(1).drop_back(1), [](Value *Idx) { return match(Idx, m_Zero()); })) { - unsigned PtrWidth = - Q.DL.getPointerSizeInBits(Ops[0]->getType()->getPointerAddressSpace()); - if (Q.DL.getTypeSizeInBits(Ops.back()->getType()) == PtrWidth) { - APInt BasePtrOffset(PtrWidth, 0); + unsigned IdxWidth = + Q.DL.getIndexSizeInBits(Ops[0]->getType()->getPointerAddressSpace()); + if (Q.DL.getTypeSizeInBits(Ops.back()->getType()) == IdxWidth) { + APInt BasePtrOffset(IdxWidth, 0); Value *StrippedBasePtr = Ops[0]->stripAndAccumulateInBoundsConstantOffsets(Q.DL, BasePtrOffset); Index: ../lib/Analysis/Loads.cpp =================================================================== --- ../lib/Analysis/Loads.cpp +++ ../lib/Analysis/Loads.cpp @@ -80,7 +80,7 @@ if (const GEPOperator *GEP = dyn_cast(V)) { const Value *Base = GEP->getPointerOperand(); - APInt Offset(DL.getPointerTypeSizeInBits(GEP->getType()), 0); + APInt Offset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.isNegative() || !Offset.urem(APInt(Offset.getBitWidth(), Align)).isMinValue()) return false; @@ -146,7 +146,7 @@ SmallPtrSet Visited; return ::isDereferenceableAndAlignedPointer( - V, Align, APInt(DL.getTypeSizeInBits(VTy), DL.getTypeStoreSize(Ty)), DL, + V, Align, APInt(DL.getIndexTypeSizeInBits(VTy), DL.getTypeStoreSize(Ty)), DL, CtxI, DT, Visited); } Index: ../lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- ../lib/Analysis/LoopAccessAnalysis.cpp +++ ../lib/Analysis/LoopAccessAnalysis.cpp @@ -1127,11 +1127,11 @@ if (CheckType && PtrA->getType() != PtrB->getType()) return false; - unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); + unsigned IdxWidth = DL.getIndexSizeInBits(ASA); Type *Ty = cast(PtrA->getType())->getElementType(); - APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty)); + APInt Size(IdxWidth, DL.getTypeStoreSize(Ty)); - APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0); + APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0); PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); Index: ../lib/Analysis/ScalarEvolution.cpp =================================================================== --- ../lib/Analysis/ScalarEvolution.cpp +++ ../lib/Analysis/ScalarEvolution.cpp @@ -3672,6 +3672,8 @@ /// return true. uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); + if (Ty->isPointerTy()) + return getDataLayout().getIndexTypeSizeInBits(Ty); return getDataLayout().getTypeSizeInBits(Ty); } Index: ../lib/Analysis/ValueTracking.cpp =================================================================== --- ../lib/Analysis/ValueTracking.cpp +++ ../lib/Analysis/ValueTracking.cpp @@ -89,7 +89,7 @@ if (unsigned BitWidth = Ty->getScalarSizeInBits()) return BitWidth; - return DL.getPointerTypeSizeInBits(Ty); + return DL.getIndexTypeSizeInBits(Ty); } namespace { @@ -1101,7 +1101,10 @@ unsigned SrcBitWidth; // Note that we handle pointer operands here because of inttoptr/ptrtoint // which fall through here. - SrcBitWidth = Q.DL.getTypeSizeInBits(SrcTy->getScalarType()); + Type *ScalarTy = SrcTy->getScalarType(); + SrcBitWidth = ScalarTy->isPointerTy() ? + Q.DL.getIndexTypeSizeInBits(ScalarTy) : + Q.DL.getTypeSizeInBits(ScalarTy); assert(SrcBitWidth && "SrcBitWidth can't be zero"); Known = Known.zextOrTrunc(SrcBitWidth); @@ -1555,9 +1558,13 @@ assert((V->getType()->isIntOrIntVectorTy(BitWidth) || V->getType()->isPtrOrPtrVectorTy()) && "Not integer or pointer type!"); - assert(Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth && - "V and Known should have same BitWidth"); + + Type *ScalarTy = V->getType()->getScalarType(); + unsigned ExpectedWidth = ScalarTy->isPointerTy() ? + Q.DL.getIndexTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy); + assert(ExpectedWidth == BitWidth && "V and Known should have same BitWidth"); (void)BitWidth; + (void)ExpectedWidth; const APInt *C; if (match(V, m_APInt(C))) { @@ -2194,7 +2201,11 @@ // in V, so for undef we have to conservatively return 1. We don't have the // same behavior for poison though -- that's a FIXME today. - unsigned TyBits = Q.DL.getTypeSizeInBits(V->getType()->getScalarType()); + Type *ScalarTy = V->getType()->getScalarType(); + unsigned TyBits = ScalarTy->isPointerTy() ? + Q.DL.getIndexTypeSizeInBits(ScalarTy) : + Q.DL.getTypeSizeInBits(ScalarTy); + unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; @@ -3091,7 +3102,7 @@ /// pointer plus a constant offset. Return the base and offset to the caller. Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL) { - unsigned BitWidth = DL.getPointerTypeSizeInBits(Ptr->getType()); + unsigned BitWidth = DL.getIndexTypeSizeInBits(Ptr->getType()); APInt ByteOffset(BitWidth, 0); // We walk up the defs but use a visited set to handle unreachable code. In @@ -3109,7 +3120,7 @@ // means when we construct GEPOffset, we need to use the size // of GEP's pointer type rather than the size of the original // pointer type. - APInt GEPOffset(DL.getPointerTypeSizeInBits(Ptr->getType()), 0); + APInt GEPOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); if (!GEP->accumulateConstantOffset(DL, GEPOffset)) break; Index: ../lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- ../lib/CodeGen/CodeGenPrepare.cpp +++ ../lib/CodeGen/CodeGenPrepare.cpp @@ -1581,7 +1581,7 @@ // if size - offset meets the size threshold. if (!Arg->getType()->isPointerTy()) continue; - APInt Offset(DL->getPointerSizeInBits( + APInt Offset(DL->getIndexSizeInBits( cast(Arg->getType())->getAddressSpace()), 0); Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset); Index: ../lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- ../lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ ../lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7988,8 +7988,8 @@ const GlobalValue *GV; int64_t GVOffset = 0; if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); - KnownBits Known(PtrWidth); + unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType()); + KnownBits Known(IdxWidth); llvm::computeKnownBits(GV, Known, getDataLayout()); unsigned AlignBits = Known.countMinTrailingZeros(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; Index: ../lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- ../lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ ../lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3424,10 +3424,9 @@ DAG.getConstant(Offset, dl, N.getValueType()), Flags); } } else { - MVT PtrTy = - DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS); - unsigned PtrSize = PtrTy.getSizeInBits(); - APInt ElementSize(PtrSize, DL->getTypeAllocSize(GTI.getIndexedType())); + unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS); + MVT IdxTy = MVT::getIntegerVT(IdxSize); + APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType())); // If this is a scalar constant or a splat vector of constants, // handle it quickly. @@ -3439,11 +3438,11 @@ if (CI) { if (CI->isZero()) continue; - APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); + APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize); LLVMContext &Context = *DAG.getContext(); SDValue OffsVal = VectorWidth ? - DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, PtrTy, VectorWidth)) : - DAG.getConstant(Offs, dl, PtrTy); + DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) : + DAG.getConstant(Offs, dl, IdxTy); // In an inbouds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. Index: ../lib/IR/DataLayout.cpp =================================================================== --- ../lib/IR/DataLayout.cpp +++ ../lib/IR/DataLayout.cpp @@ -129,13 +129,15 @@ PointerAlignElem PointerAlignElem::get(uint32_t AddressSpace, unsigned ABIAlign, - unsigned PrefAlign, uint32_t TypeByteWidth) { + unsigned PrefAlign, uint32_t TypeByteWidth, + uint32_t IndexWidth) { assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!"); PointerAlignElem retval; retval.AddressSpace = AddressSpace; retval.ABIAlign = ABIAlign; retval.PrefAlign = PrefAlign; retval.TypeByteWidth = TypeByteWidth; + retval.IndexWidth = IndexWidth; return retval; } @@ -144,7 +146,8 @@ return (ABIAlign == rhs.ABIAlign && AddressSpace == rhs.AddressSpace && PrefAlign == rhs.PrefAlign - && TypeByteWidth == rhs.TypeByteWidth); + && TypeByteWidth == rhs.TypeByteWidth + && IndexWidth == rhs.IndexWidth); } //===----------------------------------------------------------------------===// @@ -189,7 +192,7 @@ setAlignment((AlignTypeEnum)E.AlignType, E.ABIAlign, E.PrefAlign, E.TypeBitWidth); } - setPointerAlignment(0, 8, 8, 8); + setPointerAlignment(0, 8, 8, 8, 8); parseSpecifier(Desc); } @@ -287,6 +290,10 @@ report_fatal_error( "Pointer ABI alignment must be a power of 2"); + // Size of index used in GEP for address calculation. + // The parameter is optional. By default it is equal to size of pointer. + unsigned IndexSize = PointerMemSize; + // Preferred alignment. unsigned PointerPrefAlign = PointerABIAlign; if (!Rest.empty()) { @@ -295,10 +302,17 @@ if (!isPowerOf2_64(PointerPrefAlign)) report_fatal_error( "Pointer preferred alignment must be a power of 2"); - } + // Now read the index. It is the second optional parameter here. + if (!Rest.empty()) { + Split = split(Rest, ':'); + IndexSize = inBytes(getInt(Tok)); + if (!IndexSize) + report_fatal_error("Invalid index size of 0 bytes"); + } + } setPointerAlignment(AddrSpace, PointerABIAlign, PointerPrefAlign, - PointerMemSize); + PointerMemSize, IndexSize); break; } case 'i': @@ -467,8 +481,8 @@ } void DataLayout::setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign, - unsigned PrefAlign, - uint32_t TypeByteWidth) { + unsigned PrefAlign, uint32_t TypeByteWidth, + uint32_t IndexWidth) { if (PrefAlign < ABIAlign) report_fatal_error( "Preferred alignment cannot be less than the ABI alignment"); @@ -476,11 +490,12 @@ PointersTy::iterator I = findPointerLowerBound(AddrSpace); if (I == Pointers.end() || I->AddressSpace != AddrSpace) { Pointers.insert(I, PointerAlignElem::get(AddrSpace, ABIAlign, PrefAlign, - TypeByteWidth)); + TypeByteWidth, IndexWidth)); } else { I->ABIAlign = ABIAlign; I->PrefAlign = PrefAlign; I->TypeByteWidth = TypeByteWidth; + I->IndexWidth = IndexWidth; } } @@ -618,6 +633,22 @@ return getPointerSizeInBits(cast(Ty)->getAddressSpace()); } +unsigned DataLayout::getIndexSize(unsigned AS) const { + PointersTy::const_iterator I = findPointerLowerBound(AS); + if (I == Pointers.end() || I->AddressSpace != AS) { + I = findPointerLowerBound(0); + assert(I->AddressSpace == 0); + } + return I->IndexWidth; +} + +unsigned DataLayout::getIndexTypeSizeInBits(Type *Ty) const { + assert(Ty->isPtrOrPtrVectorTy() && + "This should only be called with a pointer or pointer vector type"); + Ty = Ty->getScalarType(); + return getIndexSizeInBits(cast(Ty)->getAddressSpace()); +} + /*! \param abi_or_pref Flag that determines which alignment is returned. true returns the ABI alignment, false returns the preferred alignment. @@ -701,13 +732,13 @@ IntegerType *DataLayout::getIntPtrType(LLVMContext &C, unsigned AddressSpace) const { - return IntegerType::get(C, getPointerSizeInBits(AddressSpace)); + return IntegerType::get(C, getIndexSizeInBits(AddressSpace)); } Type *DataLayout::getIntPtrType(Type *Ty) const { assert(Ty->isPtrOrPtrVectorTy() && "Expected a pointer or pointer vector type."); - unsigned NumBits = getPointerTypeSizeInBits(Ty); + unsigned NumBits = getIndexTypeSizeInBits(Ty); IntegerType *IntTy = IntegerType::get(Ty->getContext(), NumBits); if (VectorType *VecTy = dyn_cast(Ty)) return VectorType::get(IntTy, VecTy->getNumElements()); @@ -726,6 +757,16 @@ return Max != LegalIntWidths.end() ? *Max : 0; } +Type *DataLayout::getIndexType(Type *Ty) const { + assert(Ty->isPtrOrPtrVectorTy() && + "Expected a pointer or pointer vector type."); + unsigned NumBits = getIndexTypeSizeInBits(Ty); + IntegerType *IntTy = IntegerType::get(Ty->getContext(), NumBits); + if (VectorType *VecTy = dyn_cast(Ty)) + return VectorType::get(IntTy, VecTy->getNumElements()); + return IntTy; +} + int64_t DataLayout::getIndexedOffsetInType(Type *ElemTy, ArrayRef Indices) const { int64_t Result = 0; Index: ../lib/IR/Operator.cpp =================================================================== --- ../lib/IR/Operator.cpp +++ ../lib/IR/Operator.cpp @@ -35,8 +35,8 @@ bool GEPOperator::accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const { assert(Offset.getBitWidth() == - DL.getPointerSizeInBits(getPointerAddressSpace()) && - "The offset must have exactly as many bits as our pointer."); + DL.getIndexSizeInBits(getPointerAddressSpace()) && + "The offset bit width does not match DL specification."); for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this); GTI != GTE; ++GTI) { Index: ../lib/IR/Value.cpp =================================================================== --- ../lib/IR/Value.cpp +++ ../lib/IR/Value.cpp @@ -587,9 +587,9 @@ if (!getType()->isPointerTy()) return this; - assert(Offset.getBitWidth() == DL.getPointerSizeInBits(cast( + assert(Offset.getBitWidth() == DL.getIndexSizeInBits(cast( getType())->getAddressSpace()) && - "The offset must have exactly as many bits as our pointer."); + "The offset bit width does not match the DL specification."); // Even though we don't look through PHI nodes, we could be called on an // instruction in an unreachable block, which may be on a cycle. Index: ../lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- ../lib/Transforms/InstCombine/InstCombineCasts.cpp +++ ../lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1761,7 +1761,7 @@ Type *Ty = CI.getType(); unsigned AS = CI.getPointerAddressSpace(); - if (Ty->getScalarSizeInBits() == DL.getPointerSizeInBits(AS)) + if (Ty->getScalarSizeInBits() == DL.getIndexSizeInBits(AS)) return commonPointerCastTransforms(CI); Type *PtrTy = DL.getIntPtrType(CI.getContext(), AS); @@ -2014,13 +2014,13 @@ !match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) || !BO->isBitwiseLogicOp()) return nullptr; - + // FIXME: This transform is restricted to vector types to avoid backend // problems caused by creating potentially illegal operations. If a fix-up is // added to handle that situation, we can remove this check. if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy()) return nullptr; - + Value *X; if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy && !isa(X)) { Index: ../lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- ../lib/Transforms/InstCombine/InstCombineCompares.cpp +++ ../lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -682,7 +682,7 @@ // 4. Emit GEPs to get the original pointers. // 5. Remove the original instructions. Type *IndexType = IntegerType::get( - Base->getContext(), DL.getPointerTypeSizeInBits(Start->getType())); + Base->getContext(), DL.getIndexTypeSizeInBits(Start->getType())); DenseMap NewInsts; NewInsts[Base] = ConstantInt::getNullValue(IndexType); @@ -790,7 +790,7 @@ static std::pair getAsConstantIndexedAddress(Value *V, const DataLayout &DL) { Type *IndexType = IntegerType::get(V->getContext(), - DL.getPointerTypeSizeInBits(V->getType())); + DL.getIndexTypeSizeInBits(V->getType())); Constant *Index = ConstantInt::getNullValue(IndexType); while (true) { @@ -4031,7 +4031,7 @@ // Get scalar or pointer size. unsigned BitWidth = Ty->isIntOrIntVectorTy() ? Ty->getScalarSizeInBits() - : DL.getTypeSizeInBits(Ty->getScalarType()); + : DL.getIndexTypeSizeInBits(Ty->getScalarType()); if (!BitWidth) return nullptr; Index: ../lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- ../lib/Transforms/InstCombine/InstructionCombining.cpp +++ ../lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1115,7 +1115,7 @@ // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] - Type *IntPtrTy = DL.getIntPtrType(PtrTy); + Type *IndexTy = DL.getIndexType(PtrTy); int64_t FirstIdx = 0; if (int64_t TySize = DL.getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; @@ -1130,7 +1130,7 @@ assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); } - NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); + NewIndices.push_back(ConstantInt::get(IndexTy, FirstIdx)); // Index into the types. If we fail, set OrigBase to null. while (Offset) { @@ -1152,7 +1152,7 @@ } else if (ArrayType *AT = dyn_cast(Ty)) { uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType()); assert(EltSize && "Cannot index into a zero-sized array"); - NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); + NewIndices.push_back(ConstantInt::get(IndexTy,Offset/EltSize)); Offset %= EltSize; Ty = AT->getElementType(); } else { @@ -1515,8 +1515,11 @@ // Eliminate unneeded casts for indices, and replace indices which displace // by multiples of a zero size type with zero. bool MadeChange = false; - Type *IntPtrTy = - DL.getIntPtrType(GEP.getPointerOperandType()->getScalarType()); + + // Index width may not be the same width as pointer width. + // Data layout chooses the right type based on supported integer types. + Type *NewScalarIndexTy = + DL.getIndexType(GEP.getPointerOperandType()->getScalarType()); gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; @@ -1525,10 +1528,11 @@ if (GTI.isStruct()) continue; - // Index type should have the same width as IntPtr Type *IndexTy = (*I)->getType(); - Type *NewIndexType = IndexTy->isVectorTy() ? - VectorType::get(IntPtrTy, IndexTy->getVectorNumElements()) : IntPtrTy; + Type *NewIndexType = + IndexTy->isVectorTy() + ? VectorType::get(NewScalarIndexTy, IndexTy->getVectorNumElements()) + : NewScalarIndexTy; // If the element type has zero size then any index over it is equivalent // to an index of zero, so replace it with zero if it is not zero already. @@ -1731,7 +1735,7 @@ if (GEP.getNumIndices() == 1) { unsigned AS = GEP.getPointerAddressSpace(); if (GEP.getOperand(1)->getType()->getScalarSizeInBits() == - DL.getPointerSizeInBits(AS)) { + DL.getIndexSizeInBits(AS)) { Type *Ty = GEP.getSourceElementType(); uint64_t TyAllocSize = DL.getTypeAllocSize(Ty); @@ -1857,7 +1861,7 @@ if (SrcElTy->isArrayTy() && DL.getTypeAllocSize(SrcElTy->getArrayElementType()) == DL.getTypeAllocSize(ResElTy)) { - Type *IdxType = DL.getIntPtrType(GEP.getType()); + Type *IdxType = DL.getIndexType(GEP.getType()); Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; Value *NewGEP = GEP.isInBounds() @@ -1884,10 +1888,11 @@ unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); uint64_t Scale = SrcSize / ResSize; - // Earlier transforms ensure that the index has type IntPtrType, which - // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) && - "Index not cast to pointer width?"); + // Earlier transforms ensure that the index has the right type + // according to Data Layout, which considerably simplifies the + // logic by eliminating implicit casts. + assert(Idx->getType() == DL.getIndexType(GEP.getType()) && + "Index type does not match the Data Layout preferences"); bool NSW; if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) { @@ -1923,19 +1928,19 @@ unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); uint64_t Scale = ArrayEltSize / ResSize; - // Earlier transforms ensure that the index has type IntPtrType, which - // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) && - "Index not cast to pointer width?"); + // Earlier transforms ensure that the index has the right type + // according to the Data Layout, which considerably simplifies + // the logic by eliminating implicit casts. + assert(Idx->getType() == DL.getIndexType(GEP.getType()) && + "Index type does not match the Data Layout preferences"); bool NSW; if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) { // Successfully decomposed Idx as NewIdx * Scale, form a new GEP. // If the multiplication NewIdx * Scale may overflow then the new // GEP may not be "inbounds". - Value *Off[2] = { - Constant::getNullValue(DL.getIntPtrType(GEP.getType())), - NewIdx}; + Type *IndTy = DL.getIndexType(GEP.getType()); + Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx}; Value *NewGEP = GEP.isInBounds() && NSW ? Builder.CreateInBoundsGEP( @@ -1971,7 +1976,7 @@ if (BitCastInst *BCI = dyn_cast(PtrOp)) { Value *Operand = BCI->getOperand(0); PointerType *OpType = cast(Operand->getType()); - unsigned OffsetBits = DL.getPointerTypeSizeInBits(GEP.getType()); + unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEP.getType()); APInt Offset(OffsetBits, 0); if (!isa(Operand) && GEP.accumulateConstantOffset(DL, Offset)) { @@ -2020,16 +2025,16 @@ } if (!GEP.isInBounds()) { - unsigned PtrWidth = - DL.getPointerSizeInBits(PtrOp->getType()->getPointerAddressSpace()); - APInt BasePtrOffset(PtrWidth, 0); + unsigned IdxWidth = + DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace()); + APInt BasePtrOffset(IdxWidth, 0); Value *UnderlyingPtrOp = PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, BasePtrOffset); if (auto *AI = dyn_cast(UnderlyingPtrOp)) { if (GEP.accumulateConstantOffset(DL, BasePtrOffset) && BasePtrOffset.isNonNegative()) { - APInt AllocSize(PtrWidth, DL.getTypeAllocSize(AI->getAllocatedType())); + APInt AllocSize(IdxWidth, DL.getTypeAllocSize(AI->getAllocatedType())); if (BasePtrOffset.ule(AllocSize)) { return GetElementPtrInst::CreateInBounds( PtrOp, makeArrayRef(Ops).slice(1), GEP.getName()); Index: ../lib/Transforms/Scalar/SROA.cpp =================================================================== --- ../lib/Transforms/Scalar/SROA.cpp +++ ../lib/Transforms/Scalar/SROA.cpp @@ -3648,7 +3648,7 @@ auto *PartPtrTy = PartTy->getPointerTo(AS); LoadInst *PLoad = IRB.CreateAlignedLoad( getAdjustedPtr(IRB, DL, BasePtr, - APInt(DL.getPointerSizeInBits(AS), PartOffset), + APInt(DL.getIndexSizeInBits(AS), PartOffset), PartPtrTy, BasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); @@ -3704,7 +3704,7 @@ StoreInst *PStore = IRB.CreateAlignedStore( PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, - APInt(DL.getPointerSizeInBits(AS), PartOffset), + APInt(DL.getIndexSizeInBits(AS), PartOffset), PartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); @@ -3786,7 +3786,7 @@ auto AS = LI->getPointerAddressSpace(); PLoad = IRB.CreateAlignedLoad( getAdjustedPtr(IRB, DL, LoadBasePtr, - APInt(DL.getPointerSizeInBits(AS), PartOffset), + APInt(DL.getIndexSizeInBits(AS), PartOffset), LoadPartPtrTy, LoadBasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); @@ -3798,7 +3798,7 @@ StoreInst *PStore = IRB.CreateAlignedStore( PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, - APInt(DL.getPointerSizeInBits(AS), PartOffset), + APInt(DL.getIndexSizeInBits(AS), PartOffset), StorePartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); Index: ../lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp =================================================================== --- ../lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ ../lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1295,7 +1295,7 @@ // We changed p+o+c to p+c+o, p+c may not be inbound anymore. const DataLayout &DAL = First->getModule()->getDataLayout(); - APInt Offset(DAL.getPointerSizeInBits( + APInt Offset(DAL.getIndexSizeInBits( cast(First->getType())->getAddressSpace()), 0); Value *NewBase = Index: ../lib/Transforms/Utils/Local.cpp =================================================================== --- ../lib/Transforms/Utils/Local.cpp +++ ../lib/Transforms/Utils/Local.cpp @@ -1527,7 +1527,7 @@ } } else if (auto *GEP = dyn_cast(&I)) { unsigned BitWidth = - M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace()); + M.getDataLayout().getIndexSizeInBits(GEP->getPointerAddressSpace()); // Rewrite a constant GEP into a DIExpression. Since we are performing // arithmetic to compute the variable's *value* in the DIExpression, we // need to mark the expression with a DW_OP_stack_value. @@ -2123,7 +2123,7 @@ if (!NewTy->isPointerTy()) return; - unsigned BitWidth = DL.getTypeSizeInBits(NewTy); + unsigned BitWidth = DL.getIndexTypeSizeInBits(NewTy); if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { MDNode *NN = MDNode::get(OldLI.getContext(), None); NewLI.setMetadata(LLVMContext::MD_nonnull, NN); Index: ../lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- ../lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ ../lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -323,7 +323,8 @@ APInt Size(PtrBitWidth, DL.getTypeStoreSize(PtrATy)); - APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0); + unsigned IdxWidth = DL.getIndexSizeInBits(ASA); + APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0); PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); Index: ../test/Transforms/InstCombine/gep-custom-dl.ll =================================================================== --- ../test/Transforms/InstCombine/gep-custom-dl.ll +++ ../test/Transforms/InstCombine/gep-custom-dl.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-m:m-p:40:64:64:32-i32:32-i16:16-i8:8-n32" + +%struct.B = type { double } +%struct.A = type { %struct.B, i32, i32 } +%struct.C = type { [7 x i8] } + + +@Global = constant [10 x i8] c"helloworld" + + +; Test that two array indexing geps fold +define i32* @test1(i32* %I) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[B:%.*]] = getelementptr i32, i32* [[I:%.*]], i32 21 +; CHECK-NEXT: ret i32* [[B]] +; + %A = getelementptr i32, i32* %I, i8 17 + %B = getelementptr i32, i32* %A, i16 4 + ret i32* %B +} + +; Test that two getelementptr insts fold +define i32* @test2({ i32 }* %I) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[B:%.*]] = getelementptr { i32 }, { i32 }* [[I:%.*]], i32 1, i32 0 +; CHECK-NEXT: ret i32* [[B]] +; + %A = getelementptr { i32 }, { i32 }* %I, i32 1 + %B = getelementptr { i32 }, { i32 }* %A, i32 0, i32 0 + ret i32* %B +} + +define void @test3(i8 %B) { +; This should be turned into a constexpr instead of being an instruction +; CHECK-LABEL: @test3( +; CHECK-NEXT: store i8 [[B:%.*]], i8* getelementptr inbounds ([10 x i8], [10 x i8]* @Global, i32 0, i32 4), align 1 +; CHECK-NEXT: ret void +; + %A = getelementptr [10 x i8], [10 x i8]* @Global, i32 0, i32 4 + store i8 %B, i8* %A + ret void +} + +%as1_ptr_struct = type { i32 addrspace(1)* } +%as2_ptr_struct = type { i32 addrspace(2)* } + +@global_as2 = addrspace(2) global i32 zeroinitializer +@global_as1_as2_ptr = addrspace(1) global %as2_ptr_struct { i32 addrspace(2)* @global_as2 } + +; This should be turned into a constexpr instead of being an instruction +define void @test_evaluate_gep_nested_as_ptrs(i32 addrspace(2)* %B) { +; CHECK-LABEL: @test_evaluate_gep_nested_as_ptrs( +; CHECK-NEXT: store i32 addrspace(2)* [[B:%.*]], i32 addrspace(2)* addrspace(1)* getelementptr inbounds (%as2_ptr_struct, [[AS2_PTR_STRUCT:%.*]] addrspace(1)* @global_as1_as2_ptr, i32 0, i32 0), align 8 +; CHECK-NEXT: ret void +; + %A = getelementptr %as2_ptr_struct, %as2_ptr_struct addrspace(1)* @global_as1_as2_ptr, i32 0, i32 0 + store i32 addrspace(2)* %B, i32 addrspace(2)* addrspace(1)* %A + ret void +} + +@arst = addrspace(1) global [4 x i8 addrspace(2)*] zeroinitializer + +define void @test_evaluate_gep_as_ptrs_array(i8 addrspace(2)* %B) { +; CHECK-LABEL: @test_evaluate_gep_as_ptrs_array( +; CHECK-NEXT: store i8 addrspace(2)* [[B:%.*]], i8 addrspace(2)* addrspace(1)* getelementptr inbounds ([4 x i8 addrspace(2)*], [4 x i8 addrspace(2)*] addrspace(1)* @arst, i32 0, i32 2), align 16 +; CHECK-NEXT: ret void +; + + %A = getelementptr [4 x i8 addrspace(2)*], [4 x i8 addrspace(2)*] addrspace(1)* @arst, i16 0, i16 2 + store i8 addrspace(2)* %B, i8 addrspace(2)* addrspace(1)* %A + ret void +} + +define i32* @test4(i32* %I, i32 %C, i32 %D) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: [[A:%.*]] = getelementptr i32, i32* [[I:%.*]], i32 [[C:%.*]] +; CHECK-NEXT: [[B:%.*]] = getelementptr i32, i32* [[A]], i32 [[D:%.*]] +; CHECK-NEXT: ret i32* [[B]] +; + %A = getelementptr i32, i32* %I, i32 %C + %B = getelementptr i32, i32* %A, i32 %D + ret i32* %B +} + + +define i1 @test5({ i32, i32 }* %x, { i32, i32 }* %y) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: [[TMP_4:%.*]] = icmp eq { i32, i32 }* [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[TMP_4]] +; + %tmp.1 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1 + %tmp.3 = getelementptr { i32, i32 }, { i32, i32 }* %y, i32 0, i32 1 + ;; seteq x, y + %tmp.4 = icmp eq i32* %tmp.1, %tmp.3 + ret i1 %tmp.4 +} + +%S = type { i32, [ 100 x i32] } + +define <2 x i1> @test6(<2 x i32> %X, <2 x %S*> %P) nounwind { +; CHECK-LABEL: @test6( +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = getelementptr inbounds %S, <2 x %S*> %P, <2 x i32> zeroinitializer, <2 x i32> , <2 x i32> %X + %B = getelementptr inbounds %S, <2 x %S*> %P, <2 x i32> , <2 x i32> + %C = icmp eq <2 x i32*> %A, %B + ret <2 x i1> %C +} + +@G = external global [3 x i8] +define i8* @test7(i16 %Idx) { +; CHECK-LABEL: @test7( +; CHECK-NEXT: [[ZE_IDX:%.*]] = zext i16 [[IDX:%.*]] to i32 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [3 x i8], [3 x i8]* @G, i32 0, i32 [[ZE_IDX]] +; CHECK-NEXT: ret i8* [[TMP]] +; + %ZE_Idx = zext i16 %Idx to i32 + %tmp = getelementptr i8, i8* getelementptr ([3 x i8], [3 x i8]* @G, i32 0, i32 0), i32 %ZE_Idx + ret i8* %tmp +} + + +; Test folding of constantexpr geps into normal geps. +@Array = external global [40 x i32] +define i32 *@test8(i32 %X) { +; CHECK-LABEL: @test8( +; CHECK-NEXT: [[A:%.*]] = getelementptr [40 x i32], [40 x i32]* @Array, i32 0, i32 [[X:%.*]] +; CHECK-NEXT: ret i32* [[A]] +; + %A = getelementptr i32, i32* getelementptr ([40 x i32], [40 x i32]* @Array, i32 0, i32 0), i32 %X + ret i32* %A +} + +define i32 *@test9(i32 *%base, i8 %ind) { +; CHECK-LABEL: @test9( +; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[IND:%.*]] to i32 +; CHECK-NEXT: [[RES:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i32 [[TMP1]] +; CHECK-NEXT: ret i32* [[RES]] +; + %res = getelementptr i32, i32 *%base, i8 %ind + ret i32* %res +} + +define i32 @test10() { +; CHECK-LABEL: @test10( +; CHECK-NEXT: ret i32 8 +; + %A = getelementptr { i32, double }, { i32, double }* null, i32 0, i32 1 + %B = ptrtoint double* %A to i32 + ret i32 %B +} Index: ../test/Transforms/InstCombine/icmp-custom-dl.ll =================================================================== --- ../test/Transforms/InstCombine/icmp-custom-dl.ll +++ ../test/Transforms/InstCombine/icmp-custom-dl.ll @@ -0,0 +1,247 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:40:64:64:32-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +declare i32 @test58_d(i64 ) + +define i1 @test59(i8* %foo) { +; CHECK-LABEL: @test59( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, i8* [[FOO:%.*]], i32 8 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8* [[GEP1]] to i32 +; CHECK-NEXT: [[USE:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @test58_d(i64 [[USE]]) +; CHECK-NEXT: ret i1 true +; + %bit = bitcast i8* %foo to i32* + %gep1 = getelementptr inbounds i32, i32* %bit, i64 2 + %gep2 = getelementptr inbounds i8, i8* %foo, i64 10 + %cast1 = bitcast i32* %gep1 to i8* + %cmp = icmp ult i8* %cast1, %gep2 + %use = ptrtoint i8* %cast1 to i64 + %call = call i32 @test58_d(i64 %use) + ret i1 %cmp +} + +define i1 @test59_as1(i8 addrspace(1)* %foo) { +; CHECK-LABEL: @test59_as1( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[FOO:%.*]], i16 8 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[GEP1]] to i16 +; CHECK-NEXT: [[USE:%.*]] = zext i16 [[TMP1]] to i64 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @test58_d(i64 [[USE]]) +; CHECK-NEXT: ret i1 true +; + %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)* + %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i64 2 + %gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i64 10 + %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)* + %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2 + %use = ptrtoint i8 addrspace(1)* %cast1 to i64 + %call = call i32 @test58_d(i64 %use) + ret i1 %cmp +} + +define i1 @test60(i8* %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @test60( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[I:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[J:%.*]] to i32 +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nuw i32 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[GEP1_IDX]], [[TMP2]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %bit = bitcast i8* %foo to i32* + %gep1 = getelementptr inbounds i32, i32* %bit, i64 %i + %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j + %cast1 = bitcast i32* %gep1 to i8* + %cmp = icmp ult i8* %cast1, %gep2 + ret i1 %cmp +} + +define i1 @test60_as1(i8 addrspace(1)* %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @test60_as1( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[I:%.*]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[J:%.*]] to i16 +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nuw i16 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP2]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)* + %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i64 %i + %gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i64 %j + %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)* + %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2 + ret i1 %cmp +} + +; Same as test60, but look through an addrspacecast instead of a +; bitcast. This uses the same sized addrspace. +define i1 @test60_addrspacecast(i8* %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @test60_addrspacecast( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[J:%.*]] to i32 +; CHECK-NEXT: [[I_TR:%.*]] = trunc i64 [[I:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[I_TR]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %bit = addrspacecast i8* %foo to i32 addrspace(3)* + %gep1 = getelementptr inbounds i32, i32 addrspace(3)* %bit, i64 %i + %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j + %cast1 = addrspacecast i32 addrspace(3)* %gep1 to i8* + %cmp = icmp ult i8* %cast1, %gep2 + ret i1 %cmp +} + +define i1 @test60_addrspacecast_smaller(i8* %foo, i16 %i, i64 %j) { +; CHECK-LABEL: @test60_addrspacecast_smaller( +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nuw i16 [[I:%.*]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[J:%.*]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %bit = addrspacecast i8* %foo to i32 addrspace(1)* + %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i16 %i + %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j + %cast1 = addrspacecast i32 addrspace(1)* %gep1 to i8* + %cmp = icmp ult i8* %cast1, %gep2 + ret i1 %cmp +} + +define i1 @test60_addrspacecast_larger(i8 addrspace(1)* %foo, i32 %i, i16 %j) { +; CHECK-LABEL: @test60_addrspacecast_larger( +; CHECK-NEXT: [[I_TR:%.*]] = trunc i32 [[I:%.*]] to i16 +; CHECK-NEXT: [[TMP1:%.*]] = shl i16 [[I_TR]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i16 [[TMP1]], [[J:%.*]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %bit = addrspacecast i8 addrspace(1)* %foo to i32 addrspace(2)* + %gep1 = getelementptr inbounds i32, i32 addrspace(2)* %bit, i32 %i + %gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i16 %j + %cast1 = addrspacecast i32 addrspace(2)* %gep1 to i8 addrspace(1)* + %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2 + ret i1 %cmp +} + +define i1 @test61(i8* %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @test61( +; CHECK-NEXT: [[BIT:%.*]] = bitcast i8* [[FOO:%.*]] to i32* +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[I:%.*]] to i32 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, i32* [[BIT]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[J:%.*]] to i32 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, i8* [[FOO]], i32 [[TMP2]] +; CHECK-NEXT: [[CAST1:%.*]] = bitcast i32* [[GEP1]] to i8* +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8* [[GEP2]], [[CAST1]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %bit = bitcast i8* %foo to i32* + %gep1 = getelementptr i32, i32* %bit, i64 %i + %gep2 = getelementptr i8, i8* %foo, i64 %j + %cast1 = bitcast i32* %gep1 to i8* + %cmp = icmp ult i8* %cast1, %gep2 + ret i1 %cmp +; Don't transform non-inbounds GEPs. +} + +define i1 @test61_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { +; CHECK-LABEL: @test61_as1( +; CHECK-NEXT: [[BIT:%.*]] = bitcast i8 addrspace(1)* [[FOO:%.*]] to i32 addrspace(1)* +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, i32 addrspace(1)* [[BIT]], i16 [[I:%.*]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, i8 addrspace(1)* [[FOO]], i16 [[J:%.*]] +; CHECK-NEXT: [[CAST1:%.*]] = bitcast i32 addrspace(1)* [[GEP1]] to i8 addrspace(1)* +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 addrspace(1)* [[GEP2]], [[CAST1]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)* + %gep1 = getelementptr i32, i32 addrspace(1)* %bit, i16 %i + %gep2 = getelementptr i8, i8 addrspace(1)* %foo, i16 %j + %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)* + %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2 + ret i1 %cmp +; Don't transform non-inbounds GEPs. +} + +define i1 @test62(i8* %a) { +; CHECK-LABEL: @test62( +; CHECK-NEXT: ret i1 true +; + %arrayidx1 = getelementptr inbounds i8, i8* %a, i64 1 + %arrayidx2 = getelementptr inbounds i8, i8* %a, i64 10 + %cmp = icmp slt i8* %arrayidx1, %arrayidx2 + ret i1 %cmp +} + +define i1 @test62_as1(i8 addrspace(1)* %a) { +; CHECK-LABEL: @test62_as1( +; CHECK-NEXT: ret i1 true +; + %arrayidx1 = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 1 + %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 10 + %cmp = icmp slt i8 addrspace(1)* %arrayidx1, %arrayidx2 + ret i1 %cmp +} + + +; Variation of the above with an ashr +define i1 @icmp_and_ashr_multiuse(i32 %X) { +; CHECK-LABEL: @icmp_and_ashr_multiuse( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 240 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[X]], 496 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND]], 224 +; CHECK-NEXT: [[TOBOOL2:%.*]] = icmp ne i32 [[AND2]], 432 +; CHECK-NEXT: [[AND3:%.*]] = and i1 [[TOBOOL]], [[TOBOOL2]] +; CHECK-NEXT: ret i1 [[AND3]] +; + %shr = ashr i32 %X, 4 + %and = and i32 %shr, 15 + %and2 = and i32 %shr, 31 ; second use of the shift + %tobool = icmp ne i32 %and, 14 + %tobool2 = icmp ne i32 %and2, 27 + %and3 = and i1 %tobool, %tobool2 + ret i1 %and3 +} + +define i1 @icmp_lshr_and_overshift(i8 %X) { +; CHECK-LABEL: @icmp_lshr_and_overshift( +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ugt i8 [[X:%.*]], 31 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; + %shr = lshr i8 %X, 5 + %and = and i8 %shr, 15 + %tobool = icmp ne i8 %and, 0 + ret i1 %tobool +} + +; We shouldn't simplify this because the and uses bits that are shifted in. +define i1 @icmp_ashr_and_overshift(i8 %X) { +; CHECK-LABEL: @icmp_ashr_and_overshift( +; CHECK-NEXT: [[SHR:%.*]] = ashr i8 [[X:%.*]], 5 +; CHECK-NEXT: [[AND:%.*]] = and i8 [[SHR]], 15 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[AND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; + %shr = ashr i8 %X, 5 + %and = and i8 %shr, 15 + %tobool = icmp ne i8 %and, 0 + ret i1 %tobool +} + +; PR16244 +define i1 @test71(i8* %x) { +; CHECK-LABEL: @test71( +; CHECK-NEXT: ret i1 false +; + %a = getelementptr i8, i8* %x, i64 8 + %b = getelementptr inbounds i8, i8* %x, i64 8 + %c = icmp ugt i8* %a, %b + ret i1 %c +} + +define i1 @test71_as1(i8 addrspace(1)* %x) { +; CHECK-LABEL: @test71_as1( +; CHECK-NEXT: ret i1 false +; + %a = getelementptr i8, i8 addrspace(1)* %x, i64 8 + %b = getelementptr inbounds i8, i8 addrspace(1)* %x, i64 8 + %c = icmp ugt i8 addrspace(1)* %a, %b + ret i1 %c +} + Index: ../test/Transforms/LoopIdiom/struct-custom-dl.ll =================================================================== --- ../test/Transforms/LoopIdiom/struct-custom-dl.ll +++ ../test/Transforms/LoopIdiom/struct-custom-dl.ll @@ -0,0 +1,212 @@ +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s +target datalayout = "e-p:40:64:64:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +%struct.foo = type { i32, i32 } +%struct.foo1 = type { i32, i32, i32 } +%struct.foo2 = type { i32, i16, i16 } + +;void bar1(foo_t *f, unsigned n) { +; for (unsigned i = 0; i < n; ++i) { +; f[i].a = 0; +; f[i].b = 0; +; } +;} +define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 %indvars.iv, i32 0 + store i32 0, i32* %a, align 4 + %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 %indvars.iv, i32 1 + store i32 0, i32* %b, align 4 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + %exitcond = icmp ne i32 %indvars.iv.next, %n + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar1( +; CHECK: call void @llvm.memset +; CHECK-NOT: store +} + +;void bar2(foo_t *f, unsigned n) { +; for (unsigned i = 0; i < n; ++i) { +; f[i].b = 0; +; f[i].a = 0; +; } +;} +define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 %indvars.iv, i32 1 + store i32 0, i32* %b, align 4 + %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 %indvars.iv, i32 0 + store i32 0, i32* %a, align 4 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + %exitcond = icmp ne i32 %indvars.iv.next, %n + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar2( +; CHECK: call void @llvm.memset +; CHECK-NOT: store +} + +;void bar3(foo_t *f, unsigned n) { +; for (unsigned i = n; i > 0; --i) { +; f[i].a = 0; +; f[i].b = 0; +; } +;} +define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i32 [ %n, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 %indvars.iv, i32 0 + store i32 0, i32* %a, align 4 + %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 %indvars.iv, i32 1 + store i32 0, i32* %b, align 4 + %dec = add i32 %indvars.iv, -1 + %cmp = icmp eq i32 %dec, 0 + %indvars.iv.next = add nsw i32 %indvars.iv, -1 + br i1 %cmp, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar3( +; CHECK: call void @llvm.memset +; CHECK-NOT: store +} + +;void bar4(foo_t *f, unsigned n) { +; for (unsigned i = 0; i < n; ++i) { +; f[i].a = 0; +; f[i].b = 1; +; } +;} +define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 %indvars.iv, i32 0 + store i32 0, i32* %a, align 4 + %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 %indvars.iv, i32 1 + store i32 1, i32* %b, align 4 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + %exitcond = icmp ne i32 %indvars.iv.next, %n + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar4( +; CHECK-NOT: call void @llvm.memset +} + +;void bar5(foo1_t *f, unsigned n) { +; for (unsigned i = 0; i < n; ++i) { +; f[i].a = 0; +; f[i].b = 0; +; } +;} +define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i32 %indvars.iv, i32 0 + store i32 0, i32* %a, align 4 + %b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i32 %indvars.iv, i32 1 + store i32 0, i32* %b, align 4 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + %exitcond = icmp ne i32 %indvars.iv.next, %n + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar5( +; CHECK-NOT: call void @llvm.memset +} + +;void bar6(foo2_t *f, unsigned n) { +; for (unsigned i = 0; i < n; ++i) { +; f[i].a = 0; +; f[i].b = 0; +; f[i].c = 0; +; } +;} +define void @bar6(%struct.foo2* nocapture %f, i32 %n) nounwind ssp { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i32 %indvars.iv, i32 0 + store i32 0, i32* %a, align 4 + %b = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i32 %indvars.iv, i32 1 + store i16 0, i16* %b, align 4 + %c = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i32 %indvars.iv, i32 2 + store i16 0, i16* %c, align 2 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + %exitcond = icmp ne i32 %indvars.iv.next, %n + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar6( +; CHECK: call void @llvm.memset +; CHECK-NOT: store +} Index: ../test/Transforms/LoopIdiom/unroll-custom-dl.ll =================================================================== --- ../test/Transforms/LoopIdiom/unroll-custom-dl.ll +++ ../test/Transforms/LoopIdiom/unroll-custom-dl.ll @@ -0,0 +1,78 @@ +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s +target datalayout = "e-p:64:64:64:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16 + +target triple = "x86_64-apple-darwin10.0.0" + +;void test(int *f, unsigned n) { +; for (unsigned i = 0; i < 2 * n; i += 2) { +; f[i] = 0; +; f[i+1] = 0; +; } +;} +define void @test(i32* %f, i32 %n) nounwind ssp { +entry: + %0 = shl i32 %n, 1 + %cmp1 = icmp eq i32 %0, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %f, i32 %indvars.iv + store i32 0, i32* %arrayidx, align 4 + %1 = or i32 %indvars.iv, 1 + %arrayidx2 = getelementptr inbounds i32, i32* %f, i32 %1 + store i32 0, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 2 + %cmp = icmp ult i32 %indvars.iv.next, %0 + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @test( +; CHECK: call void @llvm.memset +; CHECK-NOT: store +} + +;void test_pattern(int *f, unsigned n) { +; for (unsigned i = 0; i < 2 * n; i += 2) { +; f[i] = 2; +; f[i+1] = 2; +; } +;} +define void @test_pattern(i32* %f, i32 %n) nounwind ssp { +entry: + %mul = shl i32 %n, 1 + %cmp1 = icmp eq i32 %mul, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %f, i32 %indvars.iv + store i32 2, i32* %arrayidx, align 4 + %x1 = or i32 %indvars.iv, 1 + %arrayidx2 = getelementptr inbounds i32, i32* %f, i32 %x1 + store i32 2, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 2 + %cmp = icmp ult i32 %indvars.iv.next, %mul + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @test_pattern( +; CHECK: call void @memset_pattern16 +; CHECK-NOT: store +} Index: ../test/Transforms/PhaseOrdering/scev-custom-dl.ll =================================================================== --- ../test/Transforms/PhaseOrdering/scev-custom-dl.ll +++ ../test/Transforms/PhaseOrdering/scev-custom-dl.ll @@ -0,0 +1,67 @@ +; RUN: opt -O3 -S -analyze -scalar-evolution < %s | FileCheck %s + +target datalayout = "e-m:m-p:40:64:64:32-i32:32-i16:16-i8:8-n32" + +; +; This file contains phase ordering tests for scalar evolution. +; Test that the standard passes don't obfuscate the IR so scalar evolution can't +; recognize expressions. + +; CHECK: test1 +; The loop body contains two increments by %div. +; Make sure that 2*%div is recognizable, and not expressed as a bit mask of %d. +; CHECK: --> {%p,+,(8 * (%d /u 4))} +define void @test1(i32 %d, i32* %p) nounwind uwtable ssp { +entry: + %div = udiv i32 %d, 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp ne i32 %i.0, 64 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store i32 0, i32* %p.addr.0, align 4 + %add.ptr = getelementptr inbounds i32, i32* %p.addr.0, i32 %div + store i32 1, i32* %add.ptr, align 4 + %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %div + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +; CHECK: test1a +; Same thing as test1, but it is even more tempting to fold 2 * (%d /u 2) +; CHECK: --> {%p,+,(8 * (%d /u 2))} +define void @test1a(i32 %d, i32* %p) nounwind uwtable ssp { +entry: + %div = udiv i32 %d, 2 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp ne i32 %i.0, 64 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store i32 0, i32* %p.addr.0, align 4 + %add.ptr = getelementptr inbounds i32, i32* %p.addr.0, i32 %div + store i32 1, i32* %add.ptr, align 4 + %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %div + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: ../test/Transforms/SimplifyCFG/switch_create-custom-dl.ll =================================================================== --- ../test/Transforms/SimplifyCFG/switch_create-custom-dl.ll +++ ../test/Transforms/SimplifyCFG/switch_create-custom-dl.ll @@ -0,0 +1,660 @@ +; RUN: opt -S -simplifycfg < %s | FileCheck %s +target datalayout="p:40:64:64:32" + +declare void @foo1() + +declare void @foo2() + +define void @test1(i32 %V) { + %C1 = icmp eq i32 %V, 4 ; [#uses=1] + %C2 = icmp eq i32 %V, 17 ; [#uses=1] + %CN = or i1 %C1, %C2 ; [#uses=1] + br i1 %CN, label %T, label %F +T: ; preds = %0 + call void @foo1( ) + ret void +F: ; preds = %0 + call void @foo2( ) + ret void +; CHECK-LABEL: @test1( +; CHECK: switch i32 %V, label %F [ +; CHECK: i32 17, label %T +; CHECK: i32 4, label %T +; CHECK: ] +} + +define void @test1_ptr(i32* %V) { + %C1 = icmp eq i32* %V, inttoptr (i32 4 to i32*) + %C2 = icmp eq i32* %V, inttoptr (i32 17 to i32*) + %CN = or i1 %C1, %C2 ; [#uses=1] + br i1 %CN, label %T, label %F +T: ; preds = %0 + call void @foo1( ) + ret void +F: ; preds = %0 + call void @foo2( ) + ret void +; CHECK-LABEL: @test1_ptr( +; DL: %magicptr = ptrtoint i32* %V to i32 +; DL: switch i32 %magicptr, label %F [ +; DL: i32 17, label %T +; DL: i32 4, label %T +; DL: ] +} + +define void @test1_ptr_as1(i32 addrspace(1)* %V) { + %C1 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 4 to i32 addrspace(1)*) + %C2 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 17 to i32 addrspace(1)*) + %CN = or i1 %C1, %C2 ; [#uses=1] + br i1 %CN, label %T, label %F +T: ; preds = %0 + call void @foo1( ) + ret void +F: ; preds = %0 + call void @foo2( ) + ret void +; CHECK-LABEL: @test1_ptr_as1( +; DL: %magicptr = ptrtoint i32 addrspace(1)* %V to i16 +; DL: switch i16 %magicptr, label %F [ +; DL: i16 17, label %T +; DL: i16 4, label %T +; DL: ] +} + +define void @test2(i32 %V) { + %C1 = icmp ne i32 %V, 4 ; [#uses=1] + %C2 = icmp ne i32 %V, 17 ; [#uses=1] + %CN = and i1 %C1, %C2 ; [#uses=1] + br i1 %CN, label %T, label %F +T: ; preds = %0 + call void @foo1( ) + ret void +F: ; preds = %0 + call void @foo2( ) + ret void +; CHECK-LABEL: @test2( +; CHECK: switch i32 %V, label %T [ +; CHECK: i32 17, label %F +; CHECK: i32 4, label %F +; CHECK: ] +} + +define void @test3(i32 %V) { + %C1 = icmp eq i32 %V, 4 ; [#uses=1] + br i1 %C1, label %T, label %N +N: ; preds = %0 + %C2 = icmp eq i32 %V, 17 ; [#uses=1] + br i1 %C2, label %T, label %F +T: ; preds = %N, %0 + call void @foo1( ) + ret void +F: ; preds = %N + call void @foo2( ) + ret void + +; CHECK-LABEL: @test3( +; CHECK: switch i32 %V, label %F [ +; CHECK: i32 4, label %T +; CHECK: i32 17, label %T +; CHECK: ] +} + + + +define i32 @test4(i8 zeroext %c) nounwind ssp noredzone { +entry: + %cmp = icmp eq i8 %c, 62 + br i1 %cmp, label %lor.end, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %cmp4 = icmp eq i8 %c, 34 + br i1 %cmp4, label %lor.end, label %lor.rhs + +lor.rhs: ; preds = %lor.lhs.false + %cmp8 = icmp eq i8 %c, 92 + br label %lor.end + +lor.end: ; preds = %lor.rhs, %lor.lhs.false, %entry + %0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp8, %lor.rhs ] + %lor.ext = zext i1 %0 to i32 + ret i32 %lor.ext + +; CHECK-LABEL: @test4( +; CHECK: switch i8 %c, label %lor.rhs [ +; CHECK: i8 62, label %lor.end +; CHECK: i8 34, label %lor.end +; CHECK: i8 92, label %lor.end +; CHECK: ] +} + +define i32 @test5(i8 zeroext %c) nounwind ssp noredzone { +entry: + switch i8 %c, label %lor.rhs [ + i8 62, label %lor.end + i8 34, label %lor.end + i8 92, label %lor.end + ] + +lor.rhs: ; preds = %entry + %V = icmp eq i8 %c, 92 + br label %lor.end + +lor.end: ; preds = %entry, %entry, %entry, %lor.rhs + %0 = phi i1 [ true, %entry ], [ %V, %lor.rhs ], [ true, %entry ], [ true, %entry ] + %lor.ext = zext i1 %0 to i32 + ret i32 %lor.ext +; CHECK-LABEL: @test5( +; CHECK: switch i8 %c, label %lor.rhs [ +; CHECK: i8 62, label %lor.end +; CHECK: i8 34, label %lor.end +; CHECK: i8 92, label %lor.end +; CHECK: ] +} + + +define i1 @test6({ i32, i32 }* %I) { +entry: + %tmp.1.i = getelementptr { i32, i32 }, { i32, i32 }* %I, i64 0, i32 1 ; [#uses=1] + %tmp.2.i = load i32, i32* %tmp.1.i ; [#uses=6] + %tmp.2 = icmp eq i32 %tmp.2.i, 14 ; [#uses=1] + br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0 +shortcirc_next.0: ; preds = %entry + %tmp.6 = icmp eq i32 %tmp.2.i, 15 ; [#uses=1] + br i1 %tmp.6, label %shortcirc_done.4, label %shortcirc_next.1 +shortcirc_next.1: ; preds = %shortcirc_next.0 + %tmp.11 = icmp eq i32 %tmp.2.i, 16 ; [#uses=1] + br i1 %tmp.11, label %shortcirc_done.4, label %shortcirc_next.2 +shortcirc_next.2: ; preds = %shortcirc_next.1 + %tmp.16 = icmp eq i32 %tmp.2.i, 17 ; [#uses=1] + br i1 %tmp.16, label %shortcirc_done.4, label %shortcirc_next.3 +shortcirc_next.3: ; preds = %shortcirc_next.2 + %tmp.21 = icmp eq i32 %tmp.2.i, 18 ; [#uses=1] + br i1 %tmp.21, label %shortcirc_done.4, label %shortcirc_next.4 +shortcirc_next.4: ; preds = %shortcirc_next.3 + %tmp.26 = icmp eq i32 %tmp.2.i, 19 ; [#uses=1] + br label %UnifiedReturnBlock +shortcirc_done.4: ; preds = %shortcirc_next.3, %shortcirc_next.2, %shortcirc_next.1, %shortcirc_next.0, %entry + br label %UnifiedReturnBlock +UnifiedReturnBlock: ; preds = %shortcirc_done.4, %shortcirc_next.4 + %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ] ; [#uses=1] + ret i1 %UnifiedRetVal + +; CHECK-LABEL: @test6( +; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14 +; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6 +} + +define void @test7(i8 zeroext %c, i32 %x) nounwind ssp noredzone { +entry: + %cmp = icmp ult i32 %x, 32 + %cmp4 = icmp eq i8 %c, 97 + %or.cond = or i1 %cmp, %cmp4 + %cmp9 = icmp eq i8 %c, 99 + %or.cond11 = or i1 %or.cond, %cmp9 + br i1 %or.cond11, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @foo1() nounwind noredzone + ret void + +if.end: ; preds = %entry + ret void + +; CHECK-LABEL: @test7( +; CHECK: %cmp = icmp ult i32 %x, 32 +; CHECK: br i1 %cmp, label %if.then, label %switch.early.test +; CHECK: switch.early.test: +; CHECK: switch i8 %c, label %if.end [ +; CHECK: i8 99, label %if.then +; CHECK: i8 97, label %if.then +; CHECK: ] +} + +define i32 @test8(i8 zeroext %c, i32 %x, i1 %C) nounwind ssp noredzone { +entry: + br i1 %C, label %N, label %if.then +N: + %cmp = icmp ult i32 %x, 32 + %cmp4 = icmp eq i8 %c, 97 + %or.cond = or i1 %cmp, %cmp4 + %cmp9 = icmp eq i8 %c, 99 + %or.cond11 = or i1 %or.cond, %cmp9 + br i1 %or.cond11, label %if.then, label %if.end + +if.then: ; preds = %entry + %A = phi i32 [0, %entry], [42, %N] + tail call void @foo1() nounwind noredzone + ret i32 %A + +if.end: ; preds = %entry + ret i32 0 + +; CHECK-LABEL: @test8( +; CHECK: switch.early.test: +; CHECK: switch i8 %c, label %if.end [ +; CHECK: i8 99, label %if.then +; CHECK: i8 97, label %if.then +; CHECK: ] +; CHECK: %A = phi i32 [ 0, %entry ], [ 42, %switch.early.test ], [ 42, %N ], [ 42, %switch.early.test ] +} + +;; This is "Example 7" from http://blog.regehr.org/archives/320 +define i32 @test9(i8 zeroext %c) nounwind ssp noredzone { +entry: + %cmp = icmp ult i8 %c, 33 + br i1 %cmp, label %lor.end, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %cmp4 = icmp eq i8 %c, 46 + br i1 %cmp4, label %lor.end, label %lor.lhs.false6 + +lor.lhs.false6: ; preds = %lor.lhs.false + %cmp9 = icmp eq i8 %c, 44 + br i1 %cmp9, label %lor.end, label %lor.lhs.false11 + +lor.lhs.false11: ; preds = %lor.lhs.false6 + %cmp14 = icmp eq i8 %c, 58 + br i1 %cmp14, label %lor.end, label %lor.lhs.false16 + +lor.lhs.false16: ; preds = %lor.lhs.false11 + %cmp19 = icmp eq i8 %c, 59 + br i1 %cmp19, label %lor.end, label %lor.lhs.false21 + +lor.lhs.false21: ; preds = %lor.lhs.false16 + %cmp24 = icmp eq i8 %c, 60 + br i1 %cmp24, label %lor.end, label %lor.lhs.false26 + +lor.lhs.false26: ; preds = %lor.lhs.false21 + %cmp29 = icmp eq i8 %c, 62 + br i1 %cmp29, label %lor.end, label %lor.lhs.false31 + +lor.lhs.false31: ; preds = %lor.lhs.false26 + %cmp34 = icmp eq i8 %c, 34 + br i1 %cmp34, label %lor.end, label %lor.lhs.false36 + +lor.lhs.false36: ; preds = %lor.lhs.false31 + %cmp39 = icmp eq i8 %c, 92 + br i1 %cmp39, label %lor.end, label %lor.rhs + +lor.rhs: ; preds = %lor.lhs.false36 + %cmp43 = icmp eq i8 %c, 39 + br label %lor.end + +lor.end: ; preds = %lor.rhs, %lor.lhs.false36, %lor.lhs.false31, %lor.lhs.false26, %lor.lhs.false21, %lor.lhs.false16, %lor.lhs.false11, %lor.lhs.false6, %lor.lhs.false, %entry + %0 = phi i1 [ true, %lor.lhs.false36 ], [ true, %lor.lhs.false31 ], [ true, %lor.lhs.false26 ], [ true, %lor.lhs.false21 ], [ true, %lor.lhs.false16 ], [ true, %lor.lhs.false11 ], [ true, %lor.lhs.false6 ], [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp43, %lor.rhs ] + %conv46 = zext i1 %0 to i32 + ret i32 %conv46 + +; CHECK-LABEL: @test9( +; CHECK: %cmp = icmp ult i8 %c, 33 +; CHECK: br i1 %cmp, label %lor.end, label %switch.early.test + +; CHECK: switch.early.test: +; CHECK: switch i8 %c, label %lor.rhs [ +; CHECK: i8 92, label %lor.end +; CHECK: i8 62, label %lor.end +; CHECK: i8 60, label %lor.end +; CHECK: i8 59, label %lor.end +; CHECK: i8 58, label %lor.end +; CHECK: i8 46, label %lor.end +; CHECK: i8 44, label %lor.end +; CHECK: i8 34, label %lor.end +; CHECK: i8 39, label %lor.end +; CHECK: ] +} + +define i32 @test10(i32 %mode, i1 %Cond) { + %A = icmp ne i32 %mode, 0 + %B = icmp ne i32 %mode, 51 + %C = and i1 %A, %B + %D = and i1 %C, %Cond + br i1 %D, label %T, label %F +T: + ret i32 123 +F: + ret i32 324 + +; CHECK-LABEL: @test10( +; CHECK: br i1 %Cond, label %switch.early.test, label %F +; CHECK:switch.early.test: +; CHECK: switch i32 %mode, label %T [ +; CHECK: i32 51, label %F +; CHECK: i32 0, label %F +; CHECK: ] +} + +; PR8780 +define i32 @test11(i32 %bar) nounwind { +entry: + %cmp = icmp eq i32 %bar, 4 + %cmp2 = icmp eq i32 %bar, 35 + %or.cond = or i1 %cmp, %cmp2 + %cmp5 = icmp eq i32 %bar, 53 + %or.cond1 = or i1 %or.cond, %cmp5 + %cmp8 = icmp eq i32 %bar, 24 + %or.cond2 = or i1 %or.cond1, %cmp8 + %cmp11 = icmp eq i32 %bar, 23 + %or.cond3 = or i1 %or.cond2, %cmp11 + %cmp14 = icmp eq i32 %bar, 55 + %or.cond4 = or i1 %or.cond3, %cmp14 + %cmp17 = icmp eq i32 %bar, 12 + %or.cond5 = or i1 %or.cond4, %cmp17 + %cmp20 = icmp eq i32 %bar, 35 + %or.cond6 = or i1 %or.cond5, %cmp20 + br i1 %or.cond6, label %if.then, label %if.end + +if.then: ; preds = %entry + br label %return + +if.end: ; preds = %entry + br label %return + +return: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ 1, %if.then ], [ 0, %if.end ] + ret i32 %retval.0 + +; CHECK-LABEL: @test11( +; CHECK: switch i32 %bar, label %if.end [ +; CHECK: i32 55, label %return +; CHECK: i32 53, label %return +; CHECK: i32 35, label %return +; CHECK: i32 24, label %return +; CHECK: i32 23, label %return +; CHECK: i32 12, label %return +; CHECK: i32 4, label %return +; CHECK: ] +} + +define void @test12() nounwind { +entry: + br label %bb49.us.us + +bb49.us.us: + %A = icmp eq i32 undef, undef + br i1 %A, label %bb55.us.us, label %malformed + +bb48.us.us: + %B = icmp ugt i32 undef, undef + br i1 %B, label %bb55.us.us, label %bb49.us.us + +bb55.us.us: + br label %bb48.us.us + +malformed: + ret void +; CHECK-LABEL: @test12( + +} + +; test13 - handle switch formation with ult. +define void @test13(i32 %x) nounwind ssp noredzone { +entry: + %cmp = icmp ult i32 %x, 2 + br i1 %cmp, label %if.then, label %lor.lhs.false3 + +lor.lhs.false3: ; preds = %lor.lhs.false + %cmp5 = icmp eq i32 %x, 3 + br i1 %cmp5, label %if.then, label %lor.lhs.false6 + +lor.lhs.false6: ; preds = %lor.lhs.false3 + %cmp8 = icmp eq i32 %x, 4 + br i1 %cmp8, label %if.then, label %lor.lhs.false9 + +lor.lhs.false9: ; preds = %lor.lhs.false6 + %cmp11 = icmp eq i32 %x, 6 + br i1 %cmp11, label %if.then, label %if.end + +if.then: ; preds = %lor.lhs.false9, %lor.lhs.false6, %lor.lhs.false3, %lor.lhs.false, %entry + call void @foo1() noredzone + br label %if.end + +if.end: ; preds = %if.then, %lor.lhs.false9 + ret void +; CHECK-LABEL: @test13( +; CHECK: switch i32 %x, label %if.end [ +; CHECK: i32 6, label %if.then +; CHECK: i32 4, label %if.then +; CHECK: i32 3, label %if.then +; CHECK: i32 1, label %if.then +; CHECK: i32 0, label %if.then +; CHECK: ] +} + +; test14 - handle switch formation with ult. +define void @test14(i32 %x) nounwind ssp noredzone { +entry: + %cmp = icmp ugt i32 %x, 2 + br i1 %cmp, label %lor.lhs.false3, label %if.then + +lor.lhs.false3: ; preds = %lor.lhs.false + %cmp5 = icmp ne i32 %x, 3 + br i1 %cmp5, label %lor.lhs.false6, label %if.then + +lor.lhs.false6: ; preds = %lor.lhs.false3 + %cmp8 = icmp ne i32 %x, 4 + br i1 %cmp8, label %lor.lhs.false9, label %if.then + +lor.lhs.false9: ; preds = %lor.lhs.false6 + %cmp11 = icmp ne i32 %x, 6 + br i1 %cmp11, label %if.end, label %if.then + +if.then: ; preds = %lor.lhs.false9, %lor.lhs.false6, %lor.lhs.false3, %lor.lhs.false, %entry + call void @foo1() noredzone + br label %if.end + +if.end: ; preds = %if.then, %lor.lhs.false9 + ret void +; CHECK-LABEL: @test14( +; CHECK: switch i32 %x, label %if.end [ +; CHECK: i32 6, label %if.then +; CHECK: i32 4, label %if.then +; CHECK: i32 3, label %if.then +; CHECK: i32 1, label %if.then +; CHECK: i32 0, label %if.then +; CHECK: ] +} + +; Don't crash on ginormous ranges. +define void @test15(i128 %x) nounwind { + %cmp = icmp ugt i128 %x, 2 + br i1 %cmp, label %if.end, label %lor.false + +lor.false: + %cmp2 = icmp ne i128 %x, 100000000000000000000 + br i1 %cmp2, label %if.end, label %if.then + +if.then: + call void @foo1() noredzone + br label %if.end + +if.end: + ret void + +; CHECK-LABEL: @test15( +; CHECK-NOT: switch +; CHECK: ret void +} + +; PR8675 +; rdar://5134905 +define zeroext i1 @test16(i32 %x) nounwind { +entry: +; CHECK-LABEL: @test16( +; CHECK: %x.off = add i32 %x, -1 +; CHECK: %switch = icmp ult i32 %x.off, 3 + %cmp.i = icmp eq i32 %x, 1 + br i1 %cmp.i, label %lor.end, label %lor.lhs.false + +lor.lhs.false: + %cmp.i2 = icmp eq i32 %x, 2 + br i1 %cmp.i2, label %lor.end, label %lor.rhs + +lor.rhs: + %cmp.i1 = icmp eq i32 %x, 3 + br label %lor.end + +lor.end: + %0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp.i1, %lor.rhs ] + ret i1 %0 +} + +; Check that we don't turn an icmp into a switch where it's not useful. +define void @test17(i32 %x, i32 %y) { + %cmp = icmp ult i32 %x, 3 + %switch = icmp ult i32 %y, 2 + %or.cond775 = or i1 %cmp, %switch + br i1 %or.cond775, label %lor.lhs.false8, label %return + +lor.lhs.false8: + tail call void @foo1() + ret void + +return: + ret void + +; CHECK-LABEL: @test17( +; CHECK-NOT: switch.early.test +; CHECK-NOT: switch i32 +; CHECK: ret void +} + +define void @test18(i32 %arg) { +bb: + %tmp = and i32 %arg, -2 + %tmp1 = icmp eq i32 %tmp, 8 + %tmp2 = icmp eq i32 %arg, 10 + %tmp3 = or i1 %tmp1, %tmp2 + %tmp4 = icmp eq i32 %arg, 11 + %tmp5 = or i1 %tmp3, %tmp4 + %tmp6 = icmp eq i32 %arg, 12 + %tmp7 = or i1 %tmp5, %tmp6 + br i1 %tmp7, label %bb19, label %bb8 + +bb8: ; preds = %bb + %tmp9 = add i32 %arg, -13 + %tmp10 = icmp ult i32 %tmp9, 2 + %tmp11 = icmp eq i32 %arg, 16 + %tmp12 = or i1 %tmp10, %tmp11 + %tmp13 = icmp eq i32 %arg, 17 + %tmp14 = or i1 %tmp12, %tmp13 + %tmp15 = icmp eq i32 %arg, 18 + %tmp16 = or i1 %tmp14, %tmp15 + %tmp17 = icmp eq i32 %arg, 15 + %tmp18 = or i1 %tmp16, %tmp17 + br i1 %tmp18, label %bb19, label %bb20 + +bb19: ; preds = %bb8, %bb + tail call void @foo1() + br label %bb20 + +bb20: ; preds = %bb19, %bb8 + ret void + +; CHECK-LABEL: @test18( +; CHECK: %arg.off = add i32 %arg, -8 +; CHECK: icmp ult i32 %arg.off, 11 +} + +define void @PR26323(i1 %tobool23, i32 %tmp3) { +entry: + %tobool5 = icmp ne i32 %tmp3, 0 + %neg14 = and i32 %tmp3, -2 + %cmp17 = icmp ne i32 %neg14, -1 + %or.cond = and i1 %tobool5, %tobool23 + %or.cond1 = and i1 %cmp17, %or.cond + br i1 %or.cond1, label %if.end29, label %if.then27 + +if.then27: ; preds = %entry + call void @foo1() + unreachable + +if.end29: ; preds = %entry + ret void +} + +; CHECK-LABEL: define void @PR26323( +; CHECK: %tobool5 = icmp ne i32 %tmp3, 0 +; CHECK: %neg14 = and i32 %tmp3, -2 +; CHECK: %cmp17 = icmp ne i32 %neg14, -1 +; CHECK: %or.cond = and i1 %tobool5, %tobool23 +; CHECK: %or.cond1 = and i1 %cmp17, %or.cond +; CHECK: br i1 %or.cond1, label %if.end29, label %if.then27 + +; Form a switch when and'ing a negated power of two +; CHECK-LABEL: define void @test19 +; CHECK: switch i32 %arg, label %else [ +; CHECK: i32 32, label %if +; CHECK: i32 13, label %if +; CHECK: i32 12, label %if +define void @test19(i32 %arg) { + %and = and i32 %arg, -2 + %cmp1 = icmp eq i32 %and, 12 + %cmp2 = icmp eq i32 %arg, 32 + %pred = or i1 %cmp1, %cmp2 + br i1 %pred, label %if, label %else + +if: + call void @foo1() + ret void + +else: + ret void +} + +; Since %cmp1 is always false, a switch is never formed +; CHECK-LABEL: define void @test20 +; CHECK-NOT: switch +; CHECK: ret void +define void @test20(i32 %arg) { + %and = and i32 %arg, -2 + %cmp1 = icmp eq i32 %and, 13 + %cmp2 = icmp eq i32 %arg, 32 + %pred = or i1 %cmp1, %cmp2 + br i1 %pred, label %if, label %else + +if: + call void @foo1() + ret void + +else: + ret void +} + +; Form a switch when or'ing a power of two +; CHECK-LABEL: define void @test21 +; CHECK: i32 32, label %else +; CHECK: i32 13, label %else +; CHECK: i32 12, label %else +define void @test21(i32 %arg) { + %and = or i32 %arg, 1 + %cmp1 = icmp ne i32 %and, 13 + %cmp2 = icmp ne i32 %arg, 32 + %pred = and i1 %cmp1, %cmp2 + br i1 %pred, label %if, label %else + +if: + call void @foo1() + ret void + +else: + ret void +} + +; Since %cmp1 is always false, a switch is never formed +; CHECK-LABEL: define void @test22 +; CHECK-NOT: switch +; CHECK: ret void +define void @test22(i32 %arg) { + %and = or i32 %arg, 1 + %cmp1 = icmp ne i32 %and, 12 + %cmp2 = icmp ne i32 %arg, 32 + %pred = and i1 %cmp1, %cmp2 + br i1 %pred, label %if, label %else + +if: + call void @foo1() + ret void + +else: + ret void +} \ No newline at end of file