Index: llvm/include/llvm/Analysis/MemoryLocation.h =================================================================== --- llvm/include/llvm/Analysis/MemoryLocation.h +++ llvm/include/llvm/Analysis/MemoryLocation.h @@ -78,6 +78,7 @@ uint64_t Value; + bool Scalable = false; // Hack to support implicit construction. This should disappear when the // public LocationSize ctor goes away. enum DirectConstruction { Direct }; @@ -99,6 +100,9 @@ constexpr LocationSize(uint64_t Raw) : Value(Raw > MaxValue ? AfterPointer : Raw) {} + constexpr LocationSize(uint64_t Raw, bool Scalable) + : Value(Raw > MaxValue ? AfterPointer : Raw), Scalable(Scalable) {} + static LocationSize precise(uint64_t Value) { return LocationSize(Value); } static LocationSize precise(TypeSize Value) { if (Value.isScalable()) @@ -106,6 +110,9 @@ return precise(Value.getFixedValue()); } + static LocationSize preciseScalable(TypeSize Value) { + return LocationSize(Value.getKnownMinValue(), Value.isScalable()); + } static LocationSize upperBound(uint64_t Value) { // You can't go lower than 0, so give a precise result. if (LLVM_UNLIKELY(Value == 0)) @@ -168,6 +175,7 @@ return (Value & ImpreciseBit) == 0; } + bool isScalable() const { return Scalable; } // Convenience method to check if this LocationSize's value is 0. bool isZero() const { return hasValue() && getValue() == 0; } Index: llvm/include/llvm/IR/DataLayout.h =================================================================== --- llvm/include/llvm/IR/DataLayout.h +++ llvm/include/llvm/IR/DataLayout.h @@ -493,6 +493,11 @@ return getTypeSizeInBits(Ty) == getTypeStoreSizeInBits(Ty); } + bool isScalable(Type *Ty) const { + TypeSize BaseSize = getTypeSizeInBits(Ty); + return BaseSize.isScalable(); + } + /// Returns the offset in bytes between successive objects of the /// specified type, including alignment padding. /// Index: llvm/lib/Analysis/AliasAnalysisEvaluator.cpp =================================================================== --- llvm/lib/Analysis/AliasAnalysisEvaluator.cpp +++ llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -129,10 +129,11 @@ // iterate over the worklist, and run the full (n^2)/2 disambiguations for (auto I1 = Pointers.begin(), E = Pointers.end(); I1 != E; ++I1) { - LocationSize Size1 = LocationSize::precise(DL.getTypeStoreSize(I1->second)); + LocationSize Size1 = + LocationSize::preciseScalable(DL.getTypeStoreSize(I1->second)); for (auto I2 = Pointers.begin(); I2 != I1; ++I2) { LocationSize Size2 = - LocationSize::precise(DL.getTypeStoreSize(I2->second)); + LocationSize::preciseScalable(DL.getTypeStoreSize(I2->second)); AliasResult AR = AA.alias(I1->first, Size1, I2->first, Size2); switch (AR) { case AliasResult::NoAlias: @@ -214,7 +215,7 @@ for (CallBase *Call : Calls) { for (const auto &Pointer : Pointers) { LocationSize Size = - LocationSize::precise(DL.getTypeStoreSize(Pointer.second)); + LocationSize::preciseScalable(DL.getTypeStoreSize(Pointer.second)); switch (AA.getModRefInfo(Call, Pointer.first, Size)) { case ModRefInfo::NoModRef: PrintModRefResults("NoModRef", PrintNoModRef, Call, Pointer, Index: llvm/lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -495,6 +495,10 @@ const Value *Base; // Total constant offset from base. APInt Offset; + // Indicate if the offset is scalable (both variable and constant) + bool ScalableOffset; + // Indicate if there is only one constant offset + APInt NumConstOffset; // Scaled variable (non-constant) indices. SmallVector VarIndices; // Are all operations inbounds GEPs or non-indexing operations? @@ -506,9 +510,9 @@ dbgs() << "\n"; } void print(raw_ostream &OS) const { - OS << "(DecomposedGEP Base=" << Base->getName() - << ", Offset=" << Offset - << ", VarIndices=["; + OS << "(DecomposedGEP Base=" << Base->getName() << ", Offset=" << Offset + << ", ScalableOffset=" << ScalableOffset + << ", NumConstOffset=" << NumConstOffset << ", VarIndices=["; for (size_t i = 0; i < VarIndices.size(); i++) { if (i != 0) OS << ", "; @@ -537,6 +541,8 @@ unsigned MaxIndexSize = DL.getMaxIndexSizeInBits(); DecomposedGEP Decomposed; Decomposed.Offset = APInt(MaxIndexSize, 0); + Decomposed.ScalableOffset = false; + Decomposed.NumConstOffset = APInt(MaxIndexSize, 0); do { // See if this is a bitcast or GEP. const Operator *Op = dyn_cast(V); @@ -599,8 +605,9 @@ // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. gep_type_iterator GTI = gep_type_begin(GEPOp); unsigned IndexSize = DL.getIndexSizeInBits(AS); - // Assume all GEP operands are constants until proven otherwise. bool GepHasConstantOffset = true; + Decomposed.ScalableOffset = isa(GTI.getIndexedType()); + for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end(); I != E; ++I, ++GTI) { const Value *Index = *I; @@ -614,30 +621,18 @@ Decomposed.Offset += DL.getStructLayout(STy)->getElementOffset(FieldNo); continue; } - + TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType()); // For an array/pointer, add the element offset, explicitly scaled. if (const ConstantInt *CIdx = dyn_cast(Index)) { + Decomposed.NumConstOffset += 1; if (CIdx->isZero()) continue; - // Don't attempt to analyze GEPs if the scalable index is not zero. - TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType()); - if (AllocTypeSize.isScalable()) { - Decomposed.Base = V; - return Decomposed; - } - - Decomposed.Offset += AllocTypeSize.getFixedValue() * + Decomposed.Offset += AllocTypeSize.getKnownMinValue() * CIdx->getValue().sextOrTrunc(MaxIndexSize); continue; } - TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType()); - if (AllocTypeSize.isScalable()) { - Decomposed.Base = V; - return Decomposed; - } - GepHasConstantOffset = false; // If the integer type is smaller than the index size, it is implicitly @@ -649,7 +644,7 @@ CastedValue(Index, 0, SExtBits, TruncBits), DL, 0, AC, DT); // Scale by the type size. - unsigned TypeSize = AllocTypeSize.getFixedValue(); + unsigned TypeSize = AllocTypeSize.getKnownMinValue(); LE = LE.mul(APInt(IndexSize, TypeSize), GEPOp->isInBounds()); Decomposed.Offset += LE.Offset.sext(MaxIndexSize); APInt Scale = LE.Scale.sext(MaxIndexSize); @@ -1049,6 +1044,18 @@ if (DecompGEP1.Base == GEP1 && DecompGEP2.Base == V2) return AliasResult::MayAlias; + // If we compare 2 GEPs, one has Vscale quantity and one is not + // but the offset are both 0 and there's only one index, + // they will alias if the base address alias + if (((DecompGEP1.Offset == DecompGEP2.Offset) == 0) && + (DecompGEP1.ScalableOffset != DecompGEP2.ScalableOffset) && + (V1Size.hasValue() && V2Size.hasValue()) && + DecompGEP1.VarIndices.empty() && + ((DecompGEP1.NumConstOffset == DecompGEP2.NumConstOffset) == 1)) + return AAQI.AAR.alias(MemoryLocation::getBeforeOrAfter(DecompGEP1.Base), + MemoryLocation::getBeforeOrAfter(DecompGEP2.Base), + AAQI); + // Subtract the GEP2 pointer from the GEP1 pointer to find out their // symbolic difference. subtractDecomposedGEPs(DecompGEP1, DecompGEP2, AAQI); @@ -1070,7 +1077,14 @@ // For GEPs with identical offsets, we can preserve the size and AAInfo // when performing the alias check on the underlying objects. - if (DecompGEP1.Offset == 0 && DecompGEP1.VarIndices.empty()) + // The 2 GEPs must have equal scalable type + bool OffsetZeroCheck; + OffsetZeroCheck = + isa(V2) + ? (DecompGEP1.ScalableOffset == DecompGEP2.ScalableOffset) + : 1; + if (DecompGEP1.Offset == 0 && DecompGEP1.VarIndices.empty() && + OffsetZeroCheck) return AAQI.AAR.alias(MemoryLocation(DecompGEP1.Base, V1Size), MemoryLocation(DecompGEP2.Base, V2Size), AAQI); @@ -1087,6 +1101,11 @@ return BaseAlias; } + // If the two GEPs have differing ScalableOffset value, return MayAlias + if ((DecompGEP1.ScalableOffset != DecompGEP2.ScalableOffset) && + isa(V2)) + return AliasResult::MayAlias; + // If there is a constant difference between the pointers, but the difference // is less than the size of the associated memory object, then we know // that the objects are partially overlapping. If the difference is @@ -1100,6 +1119,9 @@ LocationSize VLeftSize = V2Size; LocationSize VRightSize = V1Size; const bool Swapped = Off.isNegative(); + const bool SameScalableLoc = + VLeftSize.isScalable() == VRightSize.isScalable(); + // const bool SameScalable = 1; if (Swapped) { // Swap if we have the situation where: @@ -1117,7 +1139,7 @@ return AliasResult::MayAlias; const uint64_t LSize = VLeftSize.getValue(); - if (Off.ult(LSize)) { + if (Off.ult(LSize) && SameScalableLoc) { // Conservatively drop processing if a phi was visited and/or offset is // too big. AliasResult AR = AliasResult::PartialAlias; @@ -1131,13 +1153,21 @@ } return AR; } - return AliasResult::NoAlias; + if (SameScalableLoc) + return AliasResult::NoAlias; } // We need to know both acess sizes for all the following heuristics. if (!V1Size.hasValue() || !V2Size.hasValue()) return AliasResult::MayAlias; + if (V1Size.isScalable() != V2Size.isScalable()) + return AliasResult::MayAlias; + + if ((DecompGEP1.ScalableOffset != DecompGEP2.ScalableOffset) && + isa(V2)) + return AliasResult::MayAlias; + APInt GCD; ConstantRange OffsetRange = ConstantRange(DecompGEP1.Offset); for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) { Index: llvm/lib/Analysis/MemoryLocation.cpp =================================================================== --- llvm/lib/Analysis/MemoryLocation.cpp +++ llvm/lib/Analysis/MemoryLocation.cpp @@ -27,8 +27,10 @@ OS << "mapEmpty"; else if (*this == mapTombstone()) OS << "mapTombstone"; - else if (isPrecise()) + else if (isPrecise() && !isScalable()) OS << "precise(" << getValue() << ')'; + else if (isPrecise() && isScalable()) + OS << "precise(vscale x " << getValue() << ')'; else OS << "upperBound(" << getValue() << ')'; } @@ -36,19 +38,31 @@ MemoryLocation MemoryLocation::get(const LoadInst *LI) { const auto &DL = LI->getModule()->getDataLayout(); - return MemoryLocation( - LI->getPointerOperand(), - LocationSize::precise(DL.getTypeStoreSize(LI->getType())), - LI->getAAMetadata()); + if (DL.isScalable(LI->getType())) + return MemoryLocation( + LI->getPointerOperand(), + LocationSize::preciseScalable(DL.getTypeStoreSize(LI->getType())), + LI->getAAMetadata()); + else + return MemoryLocation( + LI->getPointerOperand(), + LocationSize::precise(DL.getTypeStoreSize(LI->getType())), + LI->getAAMetadata()); } MemoryLocation MemoryLocation::get(const StoreInst *SI) { const auto &DL = SI->getModule()->getDataLayout(); - return MemoryLocation(SI->getPointerOperand(), - LocationSize::precise(DL.getTypeStoreSize( - SI->getValueOperand()->getType())), - SI->getAAMetadata()); + if (DL.isScalable(SI->getValueOperand()->getType())) { + return MemoryLocation(SI->getPointerOperand(), + LocationSize::preciseScalable(DL.getTypeStoreSize( + SI->getValueOperand()->getType())), + SI->getAAMetadata()); + } else + return MemoryLocation(SI->getPointerOperand(), + LocationSize::precise(DL.getTypeStoreSize( + SI->getValueOperand()->getType())), + SI->getAAMetadata()); } MemoryLocation MemoryLocation::get(const VAArgInst *VI) { Index: llvm/test/Analysis/BasicAA/vscale.ll =================================================================== --- llvm/test/Analysis/BasicAA/vscale.ll +++ llvm/test/Analysis/BasicAA/vscale.ll @@ -4,8 +4,8 @@ ; CHECK-LABEL: gep_alloca_const_offset_1 ; CHECK-DAG: MustAlias: * %alloc, * %gep1 -; CHECK-DAG: MayAlias: * %alloc, * %gep2 -; CHECK-DAG: MayAlias: * %gep1, * %gep2 +; CHECK-DAG: NoAlias: * %alloc, * %gep2 +; CHECK-DAG: NoAlias: * %gep1, * %gep2 define void @gep_alloca_const_offset_1() { %alloc = alloca %gep1 = getelementptr , ptr %alloc, i64 0 @@ -17,10 +17,9 @@ } ; CHECK-LABEL: gep_alloca_const_offset_2 -; CHECK-DAG: MayAlias: * %alloc, * %gep1 -; CHECK-DAG: MayAlias: * %alloc, * %gep2 -; TODO: AliasResult for gep1,gep2 can be improved as MustAlias -; CHECK-DAG: MayAlias: * %gep1, * %gep2 +; CHECK-DAG: NoAlias: * %alloc, * %gep1 +; CHECK-DAG: NoAlias: * %alloc, * %gep2 +; CHECK-DAG: MustAlias: * %gep1, * %gep2 define void @gep_alloca_const_offset_2() { %alloc = alloca %gep1 = getelementptr , ptr %alloc, i64 1 @@ -76,8 +75,7 @@ ; CHECK-LABEL: gep_same_base_const_offset ; CHECK-DAG: MayAlias: i32* %gep1, * %p ; CHECK-DAG: MayAlias: i32* %gep2, * %p -; TODO: AliasResult for gep1,gep2 can be improved as NoAlias -; CHECK-DAG: MayAlias: i32* %gep1, i32* %gep2 +; CHECK-DAG: NoAlias: i32* %gep1, i32* %gep2 define void @gep_same_base_const_offset(ptr %p) { %gep1 = getelementptr , ptr %p, i64 1, i64 0 %gep2 = getelementptr , ptr %p, i64 1, i64 1 @@ -101,8 +99,8 @@ } ; CHECK-LABEL: gep_different_base_const_offset -; CHECK-DAG: MayAlias: * %gep1, * %p1 -; CHECK-DAG: MayAlias: * %gep2, * %p2 +; CHECK-DAG: NoAlias: * %gep1, * %p1 +; CHECK-DAG: NoAlias: * %gep2, * %p2 ; CHECK-DAG: NoAlias: * %p1, * %p2 ; CHECK-DAG: NoAlias: * %gep1, * %p2 ; CHECK-DAG: NoAlias: * %gep2, * %p1 @@ -122,7 +120,7 @@ ; CHECK-LABEL: gep_bitcast_1 ; CHECK-DAG: MustAlias: i32* %p, * %p ; CHECK-DAG: MayAlias: i32* %gep1, * %p -; CHECK-DAG: MayAlias: i32* %gep1, i32* %p +; CHECK-DAG: NoAlias: i32* %gep1, i32* %p ; CHECK-DAG: MayAlias: i32* %gep2, * %p ; CHECK-DAG: MayAlias: i32* %gep1, i32* %gep2 ; CHECK-DAG: NoAlias: i32* %gep2, i32* %p @@ -141,7 +139,7 @@ ; CHECK-DAG: MayAlias: i32* %gep1, * %p ; CHECK-DAG: MayAlias: i32* %gep1, * %p ; CHECK-DAG: MayAlias: float* %gep2, * %p -; CHECK-DAG: MayAlias: i32* %gep1, float* %gep2 +; CHECK-DAG: MustAlias: i32* %gep1, float* %gep2 ; CHECK-DAG: MayAlias: float* %gep2, * %p define void @gep_bitcast_2(ptr %p) { %gep1 = getelementptr , ptr %p, i64 1, i64 0 @@ -174,8 +172,8 @@ ; CHECK-LABEL: gep_recursion_level_1_bitcast ; CHECK-DAG: MustAlias: i32* %a, * %a -; CHECK-DAG: MayAlias: i32* %a, i32* %gep -; CHECK-DAG: MayAlias: i32* %a, i32* %gep_rec_1 +; CHECK-DAG: NoAlias: i32* %a, i32* %gep +; CHECK-DAG: NoAlias: i32* %a, i32* %gep_rec_1 ; CHECK-DAG: MayAlias: * %a, i32* %gep ; CHECK-DAG: MayAlias: * %a, i32* %gep_rec_1 ; CHECK-DAG: NoAlias: i32* %gep, i32* %gep_rec_1 @@ -233,23 +231,24 @@ ; CHECK-DAG: NoAlias: i32* %gep, i32* %gep_rec_3 ; CHECK-DAG: NoAlias: i32* %gep, i32* %gep_rec_4 ; CHECK-DAG: NoAlias: i32* %gep, i32* %gep_rec_5 -; CHECK-DAG: NoAlias: i32* %gep, i32* %gep_rec_6 +; CHECK-DAG: MayAlias: i32* %gep, i32* %gep_rec_6 ; CHECK-DAG: NoAlias: i32* %gep_rec_1, i32* %gep_rec_2 ; CHECK-DAG: NoAlias: i32* %gep_rec_1, i32* %gep_rec_3 ; CHECK-DAG: NoAlias: i32* %gep_rec_1, i32* %gep_rec_4 ; CHECK-DAG: NoAlias: i32* %gep_rec_1, i32* %gep_rec_5 -; CHECK-DAG: NoAlias: i32* %gep_rec_1, i32* %gep_rec_6 +; CHECK-DAG: MayAlias: i32* %gep_rec_1, i32* %gep_rec_6 ; CHECK-DAG: NoAlias: i32* %gep_rec_2, i32* %gep_rec_3 ; CHECK-DAG: NoAlias: i32* %gep_rec_2, i32* %gep_rec_4 ; CHECK-DAG: NoAlias: i32* %gep_rec_2, i32* %gep_rec_5 -; CHECK-DAG: NoAlias: i32* %gep_rec_2, i32* %gep_rec_6 +; CHECK-DAG: MayAlias: i32* %gep_rec_2, i32* %gep_rec_6 ; CHECK-DAG: NoAlias: i32* %gep_rec_3, i32* %gep_rec_4 ; CHECK-DAG: NoAlias: i32* %gep_rec_3, i32* %gep_rec_5 -; CHECK-DAG: NoAlias: i32* %gep_rec_3, i32* %gep_rec_6 +; CHECK-DAG: MayAlias: i32* %gep_rec_3, i32* %gep_rec_6 ; CHECK-DAG: NoAlias: i32* %gep_rec_4, i32* %gep_rec_5 -; CHECK-DAG: NoAlias: i32* %gep_rec_4, i32* %gep_rec_6 -; CHECK-DAG: NoAlias: i32* %gep_rec_5, i32* %gep_rec_6 +; CHECK-DAG: MayAlias: i32* %gep_rec_4, i32* %gep_rec_6 +; CHECK-DAG: MayAlias: i32* %gep_rec_5, i32* %gep_rec_6 ; GEP max lookup depth was set to 6. +; The addition of special case when offset is 0 for scalable reduce the depth of analysis define void @gep_recursion_max_lookup_depth_reached(ptr %a, ptr %p) { %gep = getelementptr , ptr %p, i64 1, i64 2 %gep_rec_1 = getelementptr i32, ptr %gep, i64 1 Index: llvm/test/Transforms/GVN/vscale.ll =================================================================== --- llvm/test/Transforms/GVN/vscale.ll +++ llvm/test/Transforms/GVN/vscale.ll @@ -84,10 +84,7 @@ ; CHECK-LABEL: @load_clobber_load_gep3( ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 1, i64 0 ; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr , ptr [[P]], i64 1, i64 0 -; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4 -; CHECK-NEXT: [[CAST:%.*]] = bitcast float [[LOAD2]] to i32 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[CAST]] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD1]] ; CHECK-NEXT: ret i32 [[ADD]] ; %gep1 = getelementptr , ptr %p, i64 1, i64 0 @@ -277,8 +274,7 @@ ; CHECK-NEXT: store i32 1, ptr [[GEP2]], align 4 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[T:%.*]] = load i32, ptr [[GEP1]], align 4 -; CHECK-NEXT: store i32 [[T]], ptr [[Q:%.*]], align 4 +; CHECK-NEXT: store i32 0, ptr [[Q:%.*]], align 4 ; CHECK-NEXT: ret void ; CHECK: if.else: ; CHECK-NEXT: ret void @@ -291,7 +287,7 @@ br i1 %c, label %if.else, label %if.then if.then: - %t = load i32, ptr %gep1 ; <- load could be eliminated + %t = load i32, ptr %gep1 ; <- load should be eliminated store i32 %t, ptr %q ret void @@ -319,7 +315,7 @@ br i1 %c, label %if.else, label %if.then if.then: - %t = load i32, ptr %gep1 ; <- load could be eliminated + %t = load i32, ptr %gep1 ; <- load should be eliminated store i32 %t, ptr %q ret void @@ -351,7 +347,7 @@ br i1 %c, label %if.else, label %if.then if.then: - %t = load i32, ptr %gep1 ; <- load could be eliminated + %t = load i32, ptr %gep1 ; <- load should be eliminated store i32 %t, ptr %q ret void @@ -367,8 +363,7 @@ ; CHECK-NEXT: store [[V:%.*]], ptr [[P1]], align 16 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[T:%.*]] = load , ptr [[P]], align 16 -; CHECK-NEXT: store [[T]], ptr [[Q:%.*]], align 16 +; CHECK-NEXT: store zeroinitializer, ptr [[Q:%.*]], align 16 ; CHECK-NEXT: ret void ; CHECK: if.else: ; CHECK-NEXT: ret void @@ -380,7 +375,7 @@ br i1 %c, label %if.else, label %if.then if.then: - %t = load , ptr %p ; load could be eliminated + %t = load , ptr %p ; load should be eliminated store %t, ptr %q ret void