Index: lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- lib/Analysis/ScalarEvolutionExpander.cpp +++ lib/Analysis/ScalarEvolutionExpander.cpp @@ -2157,8 +2157,9 @@ const SCEV *Step = AR->getStepRecurrence(SE); const SCEV *Start = AR->getStart(); + Type *ARTy = AR->getType(); unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType()); - unsigned DstBits = SE.getTypeSizeInBits(AR->getType()); + unsigned DstBits = SE.getTypeSizeInBits(ARTy); // The expression {Start,+,Step} has nusw/nssw if // Step < 0, Start - |Step| * Backedge <= Start @@ -2170,11 +2171,12 @@ Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc); IntegerType *Ty = - IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(AR->getType())); + IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy)); + Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty; Value *StepValue = expandCodeFor(Step, Ty, Loc); Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc); - Value *StartValue = expandCodeFor(Start, Ty, Loc); + Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc); ConstantInt *Zero = ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits)); @@ -2197,8 +2199,21 @@ // Compute: // Start + |Step| * Backedge < Start // Start - |Step| * Backedge > Start - Value *Add = Builder.CreateAdd(StartValue, MulV); - Value *Sub = Builder.CreateSub(StartValue, MulV); + Value *Add = nullptr, *Sub = nullptr; + if (ARExpandTy->isPointerTy()) { + PointerType *ARPtrTy = cast(ARExpandTy); + const SCEV *MulS = SE.getSCEV(MulV); + const SCEV *const StepArray[2] = {MulS, SE.getNegativeSCEV(MulS)}; + Add = Builder.CreateBitCast( + expandAddToGEP(&StepArray[0], &StepArray[1], ARPtrTy, Ty, StartValue), + ARPtrTy); + Sub = Builder.CreateBitCast( + expandAddToGEP(&StepArray[1], &StepArray[2], ARPtrTy, Ty, StartValue), + ARPtrTy); + } else { + Add = Builder.CreateAdd(StartValue, MulV); + Sub = Builder.CreateSub(StartValue, MulV); + } Value *EndCompareGT = Builder.CreateICmp( Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue); Index: test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll =================================================================== --- /dev/null +++ test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll @@ -0,0 +1,73 @@ +; RUN: opt -loop-versioning -S < %s | FileCheck %s -check-prefix=LV + +; NB: addrspaces 10-13 are non-integral +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" + +; This matches the test case from PR38290 +; Check that we expand the SCEV predicate check using GEP, rather +; than ptrtoint. + +%jl_value_t = type opaque +%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 } + +declare i64 @julia_steprange_last_4949() + +define void @"japi1_align!_9477"(%jl_value_t addrspace(10)**) #0 { +; LV-LAVEL: L26.lver.check +; LV: [[OFMul:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[Step:%[^ ]*]]) +; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 0 +; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 1 +; LV-NEXT: [[PosGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base:%[^ ]*]], i64 [[Step]] +; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base]], i64 [[NegStep:%[^ ]*]] +; LV-NEXT: icmp ugt i32 addrspace(13)* [[NegGEP]], [[Base]] +; LV-NEXT: icmp ult i32 addrspace(13)* [[PosGEP]], [[Base]] +; LV-NOT: inttoptr +; LV-NOT: ptrtoint +top: + %1 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, align 8, !nonnull !1, !dereferenceable !2, !align !3 + %2 = load i32, i32* inttoptr (i64 12 to i32*), align 4, !tbaa !4 + %3 = sub i32 0, %2 + %4 = call i64 @julia_steprange_last_4949() + %5 = addrspacecast %jl_value_t addrspace(10)* %1 to %jl_value_t addrspace(11)* + %6 = bitcast %jl_value_t addrspace(11)* %5 to %jl_value_t addrspace(10)* addrspace(11)* + %7 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %6, align 8, !tbaa !4, !nonnull !1, !dereferenceable !9, !align !2 + %8 = addrspacecast %jl_value_t addrspace(10)* %7 to %jl_value_t addrspace(11)* + %9 = bitcast %jl_value_t addrspace(11)* %8 to i32 addrspace(13)* addrspace(11)* + %10 = load i32 addrspace(13)*, i32 addrspace(13)* addrspace(11)* %9, align 8, !tbaa !10, !nonnull !1 + %11 = sext i32 %3 to i64 + br label %L26 + +L26: ; preds = %L26, %top + %value_phi3 = phi i64 [ 0, %top ], [ %12, %L26 ] + %12 = add i64 %value_phi3, -1 + %13 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %12 + %14 = load i32, i32 addrspace(13)* %13, align 4, !tbaa !13 + %15 = add i64 %12, %11 + %16 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %15 + store i32 %14, i32 addrspace(13)* %16, align 4, !tbaa !13 + %17 = icmp eq i64 %value_phi3, %4 + br i1 %17, label %L45, label %L26 + +L45: ; preds = %L26 + ret void +} + +attributes #0 = { "thunk" } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"Debug Info Version", i32 3} +!1 = !{} +!2 = !{i64 16} +!3 = !{i64 8} +!4 = !{!5, !5, i64 0} +!5 = !{!"jtbaa_mutab", !6, i64 0} +!6 = !{!"jtbaa_value", !7, i64 0} +!7 = !{!"jtbaa_data", !8, i64 0} +!8 = !{!"jtbaa"} +!9 = !{i64 40} +!10 = !{!11, !11, i64 0} +!11 = !{!"jtbaa_arrayptr", !12, i64 0} +!12 = !{!"jtbaa_array", !8, i64 0} +!13 = !{!14, !14, i64 0} +!14 = !{!"jtbaa_arraybuf", !7, i64 0} Index: test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll =================================================================== --- test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll +++ test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll @@ -58,10 +58,10 @@ ; LV-NEXT: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]]) ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1 -; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]] -; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]] -; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2 -; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2 +; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]] +; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]] +; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]] +; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]] ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]] ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]] @@ -233,10 +233,10 @@ ; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]]) ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1 -; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]] -; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]] -; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2 -; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2 +; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]] +; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]] +; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]] +; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]] ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]] ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]