Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1545,6 +1545,10 @@ /// @} + /// Check whether the correspond target-specific intrinsic is supported or + /// not, return zeros when don't support. + unsigned getTargetSupportedIntrinsic(const Intrinsic::ID IID, int VF = 0) const; + private: /// The abstract base class used to type erase specific TTI /// implementations. @@ -1896,6 +1900,8 @@ Align Alignment) const = 0; virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0; + virtual unsigned getTargetSupportedIntrinsic(const Intrinsic::ID IID, + int VF = 0) const = 0; }; template @@ -2565,6 +2571,11 @@ getVPLegalizationStrategy(const VPIntrinsic &PI) const override { return Impl.getVPLegalizationStrategy(PI); } + + unsigned getTargetSupportedIntrinsic(const Intrinsic::ID IID, + int VF) const override { + return Impl.getTargetSupportedIntrinsic(IID, VF); + } }; template Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -853,6 +853,9 @@ /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard, /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert); } + unsigned getTargetSupportedIntrinsic(const Intrinsic::ID IID, int VF) const { + return 0; + } protected: // Obtain the minimum required size to hold the value (without the sign) Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -689,6 +689,10 @@ return getST()->shouldPrefetchAddressSpace(AS); } + virtual unsigned getTargetSupportedIntrinsic(const Intrinsic::ID IID, + int VF) const { + return 0; + } /// @} /// \name Vector TTI Implementations Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -1852,6 +1852,11 @@ [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, NoCapture>]>; +// Test whether two pointers are while free of write-after-read/write conflicts. +def int_whilewr_test : DefaultAttrsIntrinsic<[llvm_i1_ty], + [llvm_ptr_ty, llvm_ptr_ty], + [IntrNoMem, IntrWillReturn, IntrSpeculatable]>; + // Test whether a pointer is associated with a type metadata identifier. def int_type_test : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty], [IntrNoMem, IntrWillReturn, IntrSpeculatable]>; Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -508,9 +508,12 @@ const SmallVectorImpl &PointerChecks, SCEVExpander &Expander); -Value *addDiffRuntimeChecks( - Instruction *Loc, ArrayRef Checks, SCEVExpander &Expander, - function_ref GetVF, unsigned IC); +Value * +addDiffRuntimeChecks(Instruction *Loc, ArrayRef Checks, + SCEVExpander &Expander, + function_ref GetVF, + unsigned IC, const TargetTransformInfo *TTI, + bool Scalable = 0); /// Struct to hold information about a partially invariant condition. struct IVConditionInfo { Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1180,6 +1180,11 @@ return TTIImpl->hasActiveVectorLength(Opcode, DataType, Alignment); } +unsigned TargetTransformInfo::getTargetSupportedIntrinsic( + const Intrinsic::ID IID, int VF) const { + return TTIImpl->getTargetSupportedIntrinsic(IID, VF); +} + TargetTransformInfo::Concept::~Concept() = default; TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -386,6 +386,9 @@ InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const; + + unsigned getTargetSupportedIntrinsic(const Intrinsic::ID IID, + int VF = 0) const override; /// @} }; Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3235,3 +3235,27 @@ return AM.Scale != 0 && AM.Scale != 1; return -1; } + +unsigned AArch64TTIImpl::getTargetSupportedIntrinsic(const Intrinsic::ID IID, + int VF) const { + switch (IID) { + case Intrinsic::whilewr_test: + if (!ST->hasSVE2()) + return 0; + switch (VF) { + case 2: + return Intrinsic::aarch64_sve_whilewr_d; + case 4: + return Intrinsic::aarch64_sve_whilewr_s; + case 8: + return Intrinsic::aarch64_sve_whilewr_h; + case 16: + return Intrinsic::aarch64_sve_whilewr_b; + default: + return 0; + } + default: + break; + } + return 0; +} Index: llvm/lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUtils.cpp +++ llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" @@ -54,6 +55,11 @@ static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; static const char *LLVMLoopDisableLICM = "llvm.licm.disable"; +// This feature requires the backend to support sve-whilewr instrunction. +static cl::opt SVEWhileWR("sve-whilewr", cl::Hidden, + cl::init(true), + cl::desc("Enable whilewr instruction")); + bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, @@ -1668,11 +1674,13 @@ Value *llvm::addDiffRuntimeChecks( Instruction *Loc, ArrayRef Checks, SCEVExpander &Expander, - function_ref GetVF, unsigned IC) { + function_ref GetVF, unsigned IC, + const TargetTransformInfo *TTI, bool Scalable) { LLVMContext &Ctx = Loc->getContext(); IRBuilder ChkBuilder(Ctx, Loc->getModule()->getDataLayout()); + auto &DL = Loc->getModule()->getDataLayout(); ChkBuilder.SetInsertPoint(Loc); // Our instructions might fold to a constant. Value *MemoryRuntimeCheck = nullptr; @@ -1691,8 +1699,34 @@ Src = Builder.CreateFreeze(Src, Src->getName() + ".fr"); } Value *Diff = ChkBuilder.CreateSub(Sink, Src); - Value *IsConflict = - ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check"); + Value *IsConflict; + + auto *SinkCI = dyn_cast(Sink); + auto *SrcCI = dyn_cast(Src); + Intrinsic::ID TargetIID = + TTI->getTargetSupportedIntrinsic(Intrinsic::whilewr_test, C.AccessSize); + if (SVEWhileWR && Scalable == 1 && SinkCI && SrcCI && + SinkCI->getOpcode() == Instruction::PtrToInt && + SrcCI->getOpcode() == Instruction::PtrToInt && TargetIID) { + ElementCount VF = ElementCount::get(C.AccessSize, true); + auto *M = ChkBuilder.GetInsertBlock()->getModule(); + Type *BoolVecTy = VectorType::get(ChkBuilder.getInt1Ty(), VF); + Type *Ptr = PointerType::get(ChkBuilder.getInt32Ty(), 0); + Function *ActiveMaskFunc = + Intrinsic::getDeclaration(M, TargetIID, {BoolVecTy, Ptr}); + Value *Pred = ChkBuilder.CreateCall( + ActiveMaskFunc, {SinkCI->getOperand(0), SrcCI->getOperand(0)}); + // %vscale = call i64 @llvm.vscale.i64() + // %shl = shl nuw nsw i64 %vscale, 3 + // %idx = add nuw nsw i64 %shl, -1 + // %bit = extractelement %a, i64 %idx + auto *VFSize = GetVF(ChkBuilder, C.AccessSize); + Value *LastIdx = ChkBuilder.CreateSub(VFSize, ConstantInt::get(Ty, 1)); + IsConflict = ChkBuilder.CreateExtractElement(Pred, LastIdx, "LastElt"); + } else { + IsConflict = + ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check"); + } if (MemoryRuntimeCheck) { IsConflict = Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1934,7 +1934,7 @@ RuntimeVF = getRuntimeVF(B, B.getIntNTy(Bits), VF); return RuntimeVF; }, - IC); + IC, TTI, VF.isScalable()); } else { MemRuntimeCheckCond = addRuntimeChecks(MemCheckBlock->getTerminator(), L, Index: llvm/test/Transforms/LoopVectorize/AArch64/sve2-runtime-check-size-based-threshold.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/AArch64/sve2-runtime-check-size-based-threshold.ll @@ -0,0 +1,151 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize -mattr=+sve2 -prefer-predicate-over-epilogue=scalar-epilogue -S %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; Test case where the minimum profitable trip count due to runtime checks +; exceeds VF.getKnownMinValue() * UF. +; FIXME: The code currently incorrectly is missing a umax(VF * UF, 28). +define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr %src.1, ptr %src.2, i64 %n) { +; CHECK-LABEL: @min_trip_count_due_to_runtime_checks_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SRC_25:%.*]] = ptrtoint ptr [[SRC_2:%.*]] to i64 +; CHECK-NEXT: [[SRC_13:%.*]] = ptrtoint ptr [[SRC_1:%.*]] to i64 +; CHECK-NEXT: [[DST_12:%.*]] = ptrtoint ptr [[DST_1:%.*]] to i64 +; CHECK-NEXT: [[DST_21:%.*]] = ptrtoint ptr [[DST_2:%.*]] to i64 +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 40, i64 [[TMP1]]) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], [[TMP2]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[DST_21]], [[DST_12]] +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr [[DST_2]], ptr [[DST_1]]) +; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP4]], 1 +; CHECK-NEXT: [[LASTELT:%.*]] = extractelement [[TMP7]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP4]], 16 +; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[DST_12]], [[SRC_13]] +; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr [[DST_1]], ptr [[SRC_1]]) +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP4]], 1 +; CHECK-NEXT: [[LASTELT4:%.*]] = extractelement [[TMP11]], i64 [[TMP12]] +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[LASTELT]], [[LASTELT4]] +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP4]], 16 +; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[DST_12]], [[SRC_25]] +; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr [[DST_1]], ptr [[SRC_2]]) +; CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP4]], 1 +; CHECK-NEXT: [[LASTELT6:%.*]] = extractelement [[TMP15]], i64 [[TMP16]] +; CHECK-NEXT: [[CONFLICT_RDX7:%.*]] = or i1 [[CONFLICT_RDX]], [[LASTELT6]] +; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP4]], 16 +; CHECK-NEXT: [[TMP18:%.*]] = sub i64 [[DST_21]], [[SRC_13]] +; CHECK-NEXT: [[TMP19:%.*]] = call @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr [[DST_2]], ptr [[SRC_1]]) +; CHECK-NEXT: [[TMP20:%.*]] = sub i64 [[TMP4]], 1 +; CHECK-NEXT: [[LASTELT8:%.*]] = extractelement [[TMP19]], i64 [[TMP20]] +; CHECK-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX7]], [[LASTELT8]] +; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP4]], 16 +; CHECK-NEXT: [[TMP22:%.*]] = sub i64 [[DST_21]], [[SRC_25]] +; CHECK-NEXT: [[TMP23:%.*]] = call @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr [[DST_2]], ptr [[SRC_2]]) +; CHECK-NEXT: [[TMP24:%.*]] = sub i64 [[TMP4]], 1 +; CHECK-NEXT: [[LASTELT10:%.*]] = extractelement [[TMP23]], i64 [[TMP24]] +; CHECK-NEXT: [[CONFLICT_RDX11:%.*]] = or i1 [[CONFLICT_RDX9]], [[LASTELT10]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], [[TMP26]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 2 +; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[TMP29]], 0 +; CHECK-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 1 +; CHECK-NEXT: [[TMP32:%.*]] = add i64 [[INDEX]], [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i64, ptr [[SRC_1]], i64 [[TMP27]] +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[SRC_1]], i64 [[TMP32]] +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[TMP27]] +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[TMP32]] +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i64, ptr [[TMP33]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP37]], align 4 +; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP39:%.*]] = mul i32 [[TMP38]], 2 +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i64, ptr [[TMP33]], i32 [[TMP39]] +; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load , ptr [[TMP40]], align 4 +; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i64, ptr [[TMP35]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load , ptr [[TMP41]], align 4 +; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP43:%.*]] = mul i32 [[TMP42]], 2 +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i64, ptr [[TMP35]], i32 [[TMP43]] +; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load , ptr [[TMP44]], align 4 +; CHECK-NEXT: [[TMP45:%.*]] = add [[WIDE_LOAD]], [[WIDE_LOAD13]] +; CHECK-NEXT: [[TMP46:%.*]] = add [[WIDE_LOAD12]], [[WIDE_LOAD14]] +; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i64, ptr [[DST_1]], i64 [[TMP27]] +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i64, ptr [[DST_1]], i64 [[TMP32]] +; CHECK-NEXT: [[TMP49:%.*]] = getelementptr i64, ptr [[DST_2]], i64 [[TMP27]] +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i64, ptr [[DST_2]], i64 [[TMP32]] +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i64, ptr [[TMP47]], i32 0 +; CHECK-NEXT: store [[TMP45]], ptr [[TMP51]], align 4 +; CHECK-NEXT: [[TMP52:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP53:%.*]] = mul i32 [[TMP52]], 2 +; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i64, ptr [[TMP47]], i32 [[TMP53]] +; CHECK-NEXT: store [[TMP46]], ptr [[TMP54]], align 4 +; CHECK-NEXT: [[TMP55:%.*]] = getelementptr i64, ptr [[TMP49]], i32 0 +; CHECK-NEXT: store [[TMP45]], ptr [[TMP55]], align 4 +; CHECK-NEXT: [[TMP56:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP57:%.*]] = mul i32 [[TMP56]], 2 +; CHECK-NEXT: [[TMP58:%.*]] = getelementptr i64, ptr [[TMP49]], i32 [[TMP57]] +; CHECK-NEXT: store [[TMP46]], ptr [[TMP58]], align 4 +; CHECK-NEXT: [[TMP59:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP60:%.*]] = mul i64 [[TMP59]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP60]] +; CHECK-NEXT: [[TMP61:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP61]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i64, ptr [[SRC_1]], i64 [[IV]] +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[IV]] +; CHECK-NEXT: [[L_1:%.*]] = load i64, ptr [[GEP_SRC_1]], align 4 +; CHECK-NEXT: [[L_2:%.*]] = load i64, ptr [[GEP_SRC_2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[L_1]], [[L_2]] +; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST_1]], i64 [[IV]] +; CHECK-NEXT: [[GEP_DST_2:%.*]] = getelementptr i64, ptr [[DST_2]], i64 [[IV]] +; CHECK-NEXT: store i64 [[ADD]], ptr [[GEP_DST_1]], align 4 +; CHECK-NEXT: store i64 [[ADD]], ptr [[GEP_DST_2]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 +; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CMP10]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.src.1 = getelementptr i64, ptr %src.1, i64 %iv + %gep.src.2 = getelementptr i64, ptr %src.2, i64 %iv + %l.1 = load i64, ptr %gep.src.1 + %l.2 = load i64, ptr %gep.src.2 + %add = add i64 %l.1, %l.2 + %gep.dst.1 = getelementptr i64, ptr %dst.1, i64 %iv + %gep.dst.2 = getelementptr i64, ptr %dst.2, i64 %iv + store i64 %add, ptr %gep.dst.1 + store i64 %add, ptr %gep.dst.2 + %iv.next = add nsw i64 %iv, 1 + %cmp10 = icmp ult i64 %iv.next, %n + br i1 %cmp10, label %loop, label %exit + +exit: + ret void +}