Index: llvm/trunk/lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- llvm/trunk/lib/Analysis/BasicAliasAnalysis.cpp +++ llvm/trunk/lib/Analysis/BasicAliasAnalysis.cpp @@ -834,8 +834,42 @@ // Try to determine whether GEP1 and GEP2 index through arrays, into structs, // such that the struct field accesses provably cannot alias. // We also need at least two indices (the pointer, and the struct field). - if (GEP1->getNumIndices() != GEP2->getNumIndices() || - GEP1->getNumIndices() < 2) + if (GEP1->getNumIndices() < 2) + return MayAlias; + + // If both GEP1 and GEP2 have the inbounds keyword but index different fields + // of the same struct, they do not alias. + if (GEP1->isInBounds() && GEP2->isInBounds()) { + auto Opi1 = GEP1->op_begin() + 1; + auto Opi2 = GEP2->op_begin() + 1; + auto Ope1 = GEP1->op_end(); + auto Ope2 = GEP2->op_end(); + + SmallVector IntermediateIndices; + ConstantInt *C1 = nullptr; + ConstantInt *C2 = nullptr; + while (Opi1 != Ope1 && Opi2 != Ope2 && + (C1 = dyn_cast(*Opi1)) && + (C2 = dyn_cast(*Opi2))) { + if (C1 == C2) { + IntermediateIndices.push_back(C1); + ++Opi1; + ++Opi2; + } else { + // Both GEPs share the same pointer operand and access through the same + // indices up to this point, but now they are having different index + // values. At this point, if the indexed type is a StructType, this means + // that two GEPs are for two different fields in the same structure. + auto *Ty = GetElementPtrInst::getIndexedType( + GEP1->getSourceElementType(), IntermediateIndices); + if (isa(Ty)) + return NoAlias; + break; + } + } + } + + if (GEP1->getNumIndices() != GEP2->getNumIndices()) return MayAlias; // If we don't know the size of the accesses through both GEPs, we can't Index: llvm/trunk/test/Analysis/BasicAA/noalias-structure.ll =================================================================== --- llvm/trunk/test/Analysis/BasicAA/noalias-structure.ll +++ llvm/trunk/test/Analysis/BasicAA/noalias-structure.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.Type = type { [10 x i32], i32 } +@Foo = external global %struct.Type, align 4 + +; /// Check that BasicAA claims no alias between different fileds of a structure +; void test() { +; for (unsigned i = 0 ; i < 10 ; i++) +; Foo.arr[i] += Foo.i; +; } + +define void @test() { +; CHECK-LABEL: Function: test: +entry: + %0 = load i32, i32* getelementptr inbounds (%struct.Type, %struct.Type* @Foo, i64 0, i32 1), align 4 + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + + %arrayidx = getelementptr inbounds %struct.Type, %struct.Type* @Foo, i64 0, i32 0, i64 %indvars.iv + %1 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %1, %0 + store i32 %add, i32* %arrayidx, align 4 +; CHECK: NoAlias: i32* %arrayidx, i32* getelementptr inbounds (%struct.Type, %struct.Type* @Foo, i64 0, i32 1) + + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 10 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} Index: llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll =================================================================== --- llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll +++ llvm/trunk/test/Transforms/LoopVectorize/global_alias.ll @@ -765,22 +765,18 @@ ret i32 %7 } - -;; === Now, the tests that we could vectorize with induction changes or run-time checks === - - -; /// Different objects, swapped induction, alias at the end -; int mayAlias01 (int a) { +; /// Different objects, swapped induction +; int noAlias15(int a) { ; int i; ; for (i=0; i +; CHECK-LABEL: define i32 @noAlias15( +; CHECK: add nsw <4 x i32> ; CHECK: ret -define i32 @mayAlias01(i32 %a) nounwind { +define i32 @noAlias15(i32 %a) nounwind { entry: %a.addr = alloca i32, align 4 %i = alloca i32, align 4 @@ -819,18 +815,18 @@ ret i32 %7 } -; /// Different objects, swapped induction, alias at the beginning -; int mayAlias02 (int a) { +; /// Different objects, swapped induction +; int noAlias16 (int a) { ; int i; ; for (i=0; i +; CHECK-LABEL: define i32 @noAlias16( +; CHECK: add nsw <4 x i32> ; CHECK: ret -define i32 @mayAlias02(i32 %a) nounwind { +define i32 @noAlias16(i32 %a) nounwind { entry: %a.addr = alloca i32, align 4 %i = alloca i32, align 4 @@ -869,75 +865,21 @@ ret i32 %7 } -; /// Pointer access, run-time check added -; int mayAlias03 (int a) { -; int i; -; for (i=0; i -; CHECK: ret - -define i32 @mayAlias03(i32 %a) nounwind { -entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, i32* %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32*, i32** @PB, align 4 - %add.ptr = getelementptr inbounds i32, i32* %1, i32 100 - %2 = load i32, i32* %i, align 4 - %idx.neg = sub i32 0, %2 - %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %idx.neg - %add.ptr2 = getelementptr inbounds i32, i32* %add.ptr1, i32 -1 - %3 = load i32, i32* %add.ptr2, align 4 - %4 = load i32, i32* %a.addr, align 4 - %add = add nsw i32 %3, %4 - %5 = load i32*, i32** @PA, align 4 - %6 = load i32, i32* %i, align 4 - %add.ptr3 = getelementptr inbounds i32, i32* %5, i32 %6 - store i32 %add, i32* %add.ptr3, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %7 = load i32, i32* %i, align 4 - %inc = add nsw i32 %7, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %8 = load i32*, i32** @PA, align 4 - %9 = load i32, i32* %a.addr, align 4 - %add.ptr4 = getelementptr inbounds i32, i32* %8, i32 %9 - %10 = load i32, i32* %add.ptr4, align 4 - ret i32 %10 -} - -;; === Finally, the tests that should only vectorize with care (or if we ignore undefined behaviour at all) === +;; === Ignore undefined behavior === -; int mustAlias01 (int a) { +; int noAlias17(int a) { ; int i; ; for (i=0; i +; CHECK-LABEL: define i32 @noAlias17( +; CHECK: add nsw <4 x i32> ; CHECK: ret -define i32 @mustAlias01(i32 %a) nounwind { +define i32 @noAlias17(i32 %a) nounwind { entry: %a.addr = alloca i32, align 4 %i = alloca i32, align 4 @@ -977,17 +919,17 @@ ret i32 %7 } -; int mustAlias02 (int a) { +; int noAlias18(int a) { ; int i; ; for (i=0; i +; CHECK-LABEL: define i32 @noAlias18( +; CHECK: add nsw <4 x i32> ; CHECK: ret -define i32 @mustAlias02(i32 %a) nounwind { +define i32 @noAlias18(i32 %a) nounwind { entry: %a.addr = alloca i32, align 4 %i = alloca i32, align 4 @@ -1026,17 +968,17 @@ ret i32 %7 } -; int mustAlias03 (int a) { +; int noAlias19(int a) { ; int i; ; for (i=0; i +; CHECK-LABEL: define i32 @noAlias19( +; CHECK: add nsw <4 x i32> ; CHECK: ret -define i32 @mustAlias03(i32 %a) nounwind { +define i32 @noAlias19(i32 %a) nounwind { entry: %a.addr = alloca i32, align 4 %i = alloca i32, align 4 @@ -1075,3 +1017,57 @@ %7 = load i32, i32* %arrayidx4, align 4 ret i32 %7 } + +; /// Pointer access, run-time check added +; int mayAlias01 (int a) { +; int i; +; for (i=0; i +; CHECK: ret + +define i32 @mayAlias01(i32 %a) nounwind { +entry: + %a.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 100 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32*, i32** @PB, align 4 + %add.ptr = getelementptr inbounds i32, i32* %1, i32 100 + %2 = load i32, i32* %i, align 4 + %idx.neg = sub i32 0, %2 + %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %idx.neg + %add.ptr2 = getelementptr inbounds i32, i32* %add.ptr1, i32 -1 + %3 = load i32, i32* %add.ptr2, align 4 + %4 = load i32, i32* %a.addr, align 4 + %add = add nsw i32 %3, %4 + %5 = load i32*, i32** @PA, align 4 + %6 = load i32, i32* %i, align 4 + %add.ptr3 = getelementptr inbounds i32, i32* %5, i32 %6 + store i32 %add, i32* %add.ptr3, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %7 = load i32, i32* %i, align 4 + %inc = add nsw i32 %7, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %8 = load i32*, i32** @PA, align 4 + %9 = load i32, i32* %a.addr, align 4 + %add.ptr4 = getelementptr inbounds i32, i32* %8, i32 %9 + %10 = load i32, i32* %add.ptr4, align 4 + ret i32 %10 +}