Index: llvm/lib/Analysis/DependenceAnalysis.cpp =================================================================== --- llvm/lib/Analysis/DependenceAnalysis.cpp +++ llvm/lib/Analysis/DependenceAnalysis.cpp @@ -3349,12 +3349,8 @@ return false; } - Value *SrcBasePtr = SrcGEP->getOperand(0); - Value *DstBasePtr = DstGEP->getOperand(0); - while (auto *PCast = dyn_cast(SrcBasePtr)) - SrcBasePtr = PCast->getOperand(0); - while (auto *PCast = dyn_cast(DstBasePtr)) - DstBasePtr = PCast->getOperand(0); + Value *SrcBasePtr = SrcGEP->getOperand(0)->stripPointerCasts(); + Value *DstBasePtr = DstGEP->getOperand(0)->stripPointerCasts(); // Check that for identical base pointers we do not miss index offsets // that have been added before this GEP is applied. Index: llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll =================================================================== --- llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll +++ llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll @@ -15,7 +15,8 @@ ;; for (int j = 2; j < M; ++j) ;; a[i][j] = a[i+1][j-2]; ;; } - +;; +;; Note that there is a getelementptr with index 0, make sure we can analyze this case. define void @t1([2048 x i32]* %a) { entry: br label %for.body @@ -30,7 +31,8 @@ %1 = add nsw i64 %indvars.iv, -2 %arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %0, i64 %1 %2 = load i32, i32* %arrayidx6, align 4 - %arrayidx10 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %indvars.iv4, i64 %indvars.iv + %a_gep = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 0 + %arrayidx10 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a_gep, i64 %indvars.iv4, i64 %indvars.iv store i32 %2, i32* %arrayidx10, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp ne i64 %indvars.iv.next, 2048 @@ -45,8 +47,45 @@ ret void } - ; CHECK-LABEL: t2 +; CHECK: da analyze - consistent anti [1 -2]! + +;; Similar to @t1 but includes a call with a "returned" arg, make sure we can analyze +;; this case. + +define void @t2([2048 x i32]* %a) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.inc11 + %indvars.iv4 = phi i64 [ 0, %entry ], [ %indvars.iv.next5, %for.inc11 ] + br label %for.body4 + +for.body4: ; preds = %for.body, %for.body4 + %indvars.iv = phi i64 [ 2, %for.body ], [ %indvars.iv.next, %for.body4 ] + %0 = add nuw nsw i64 %indvars.iv4, 1 + %1 = add nsw i64 %indvars.iv, -2 + %arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %0, i64 %1 + %2 = load i32, i32* %arrayidx6, align 4 + %call = call [2048 x i32]* @func_with_returned_arg([2048 x i32]* returned %a) + %arrayidx10 = getelementptr inbounds [2048 x i32], [2048 x i32]* %call, i64 %indvars.iv4, i64 %indvars.iv + store i32 %2, i32* %arrayidx10, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 2048 + br i1 %exitcond, label %for.body4, label %for.inc11 + +for.inc11: ; preds = %for.body4 + %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1 + %exitcond7 = icmp ne i64 %indvars.iv.next5, 1023 + br i1 %exitcond7, label %for.body, label %for.end13 + +for.end13: ; preds = %for.inc11 + ret void +} + +declare [2048 x i32]* @func_with_returned_arg([2048 x i32]* returned %arg) + +; CHECK-LABEL: t3 ; CHECK: da analyze - none! ; CHECK: da analyze - consistent anti [1 -2 0 -3 2]! ; CHECK: da analyze - none! @@ -62,7 +101,7 @@ ;; a[i1][i2][i3][i4][i5] = a[i1+1][i2-2][i3][i4-3][i5+2]; ;; } -define void @t2([1024 x [1024 x [1024 x [2048 x i32]]]]* %a) { +define void @t3([1024 x [1024 x [1024 x [2048 x i32]]]]* %a) { entry: br label %for.body