diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5191,6 +5191,9 @@ Depth](ArrayRef VL) { if (!S.getOpcode() || !S.isAltShuffle() || VL.size() > 2) return false; + if (S.getOpcode() == Instruction::GetElementPtr && + !TTI->prefersVectorizedAddressing()) + return true; if (VectorizableTree.size() < MinTreeSize) return false; if (Depth >= RecursionMaxDepth - 1) @@ -11873,21 +11876,23 @@ if (!isValidElementType(SI->getValueOperand()->getType())) continue; Stores[getUnderlyingObject(SI->getPointerOperand())].push_back(SI); + continue; } // Ignore getelementptr instructions that have more than one index, a // constant index, or a pointer operand that doesn't point to a scalar // type. - else if (auto *GEP = dyn_cast(&I)) { - auto Idx = GEP->idx_begin()->get(); - if (GEP->getNumIndices() > 1 || isa(Idx)) - continue; - if (!isValidElementType(Idx->getType())) - continue; - if (GEP->getType()->isVectorTy()) - continue; - GEPs[GEP->getPointerOperand()].push_back(GEP); - } + if (TTI->prefersVectorizedAddressing()) + if (auto *GEP = dyn_cast(&I)) { + auto Idx = GEP->idx_begin()->get(); + if (GEP->getNumIndices() > 1 || isa(Idx)) + continue; + if (!isValidElementType(Idx->getType())) + continue; + if (GEP->getType()->isVectorTy()) + continue; + GEPs[GEP->getPointerOperand()].push_back(GEP); + } } } diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll @@ -12,18 +12,21 @@ ; CHECK-NEXT: [[Z0:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32> ; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[B:%.*]] to <4 x i32> ; CHECK-NEXT: [[SUB0:%.*]] = sub <4 x i32> [[Z0]], [[Z1]] -; CHECK-NEXT: [[TMP0:%.*]] = sext <4 x i32> [[SUB0]] to <4 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0 -; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x i32> [[SUB0]], i32 0 +; CHECK-NEXT: [[S0:%.*]] = sext i32 [[E0]] to i64 +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[S0]] ; CHECK-NEXT: [[LOAD0:%.*]] = load i64, ptr [[GEP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP0]], i32 1 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP2]] +; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[SUB0]], i32 1 +; CHECK-NEXT: [[S1:%.*]] = sext i32 [[E1]] to i64 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[S1]] ; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP0]], i32 2 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP3]] +; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[SUB0]], i32 2 +; CHECK-NEXT: [[S2:%.*]] = sext i32 [[E2]] to i64 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[S2]] ; CHECK-NEXT: [[LOAD2:%.*]] = load i64, ptr [[GEP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP0]], i32 3 -; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP4]] +; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[SUB0]], i32 3 +; CHECK-NEXT: [[S3:%.*]] = sext i32 [[E3]] to i64 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[S3]] ; CHECK-NEXT: [[LOAD3:%.*]] = load i64, ptr [[GEP3]], align 4 ; CHECK-NEXT: call void @foo(i64 [[LOAD0]], i64 [[LOAD1]], i64 [[LOAD2]], i64 [[LOAD3]]) ; CHECK-NEXT: ret void @@ -58,23 +61,25 @@ ; CHECK-NEXT: [[Z0:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32> ; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[B:%.*]] to <4 x i32> ; CHECK-NEXT: [[SUB0:%.*]] = sub <4 x i32> [[Z0]], [[Z1]] -; CHECK-NEXT: [[TMP0:%.*]] = sext <4 x i32> [[SUB0]] to <4 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[C0:%.*]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[C1:%.*]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[C2:%.*]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[C3:%.*]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP0]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 -; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x i32> [[SUB0]], i32 0 +; CHECK-NEXT: [[S0:%.*]] = sext i32 [[E0]] to i64 +; CHECK-NEXT: [[A0:%.*]] = add i64 [[S0]], [[C0:%.*]] +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[A0]] ; CHECK-NEXT: [[LOAD0:%.*]] = load i64, ptr [[GEP0]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP7]] +; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[SUB0]], i32 1 +; CHECK-NEXT: [[S1:%.*]] = sext i32 [[E1]] to i64 +; CHECK-NEXT: [[A1:%.*]] = add i64 [[S1]], [[C1:%.*]] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[A1]] ; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP8]] +; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[SUB0]], i32 2 +; CHECK-NEXT: [[S2:%.*]] = sext i32 [[E2]] to i64 +; CHECK-NEXT: [[A2:%.*]] = add i64 [[S2]], [[C2:%.*]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[A2]] ; CHECK-NEXT: [[LOAD2:%.*]] = load i64, ptr [[GEP2]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 -; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP9]] +; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[SUB0]], i32 3 +; CHECK-NEXT: [[S3:%.*]] = sext i32 [[E3]] to i64 +; CHECK-NEXT: [[A3:%.*]] = add i64 [[S3]], [[C3:%.*]] +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[A3]] ; CHECK-NEXT: [[LOAD3:%.*]] = load i64, ptr [[GEP3]], align 4 ; CHECK-NEXT: call void @foo(i64 [[LOAD0]], i64 [[LOAD1]], i64 [[LOAD2]], i64 [[LOAD3]]) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll @@ -36,59 +36,92 @@ ; GENERIC-NEXT: [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; GENERIC-NEXT: [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; GENERIC-NEXT: [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ] -; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8 -; GENERIC-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2 -; GENERIC-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[TMP1]] to <8 x i32> -; GENERIC-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2 -; GENERIC-NEXT: [[TMP5:%.*]] = zext <8 x i16> [[TMP4]] to <8 x i32> -; GENERIC-NEXT: [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP2]], [[TMP5]] -; GENERIC-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0 -; GENERIC-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 -; GENERIC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP8]] -; GENERIC-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -; GENERIC-NEXT: [[CONV3:%.*]] = zext i16 [[TMP9]] to i32 +; GENERIC-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 1 +; GENERIC-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR_0101]], align 2 +; GENERIC-NEXT: [[CONV:%.*]] = zext i16 [[TMP0]] to i64 +; GENERIC-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 1 +; GENERIC-NEXT: [[TMP1:%.*]] = load i16, ptr [[B]], align 2 +; GENERIC-NEXT: [[CONV2:%.*]] = zext i16 [[TMP1]] to i64 +; GENERIC-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV2]] +; GENERIC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[SUB]] +; GENERIC-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; GENERIC-NEXT: [[CONV3:%.*]] = zext i16 [[TMP2]] to i32 ; GENERIC-NEXT: [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]] -; GENERIC-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1 -; GENERIC-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 -; GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP11]] -; GENERIC-NEXT: [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2 -; GENERIC-NEXT: [[CONV11:%.*]] = zext i16 [[TMP12]] to i32 +; GENERIC-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 2 +; GENERIC-NEXT: [[TMP3:%.*]] = load i16, ptr [[INCDEC_PTR]], align 2 +; GENERIC-NEXT: [[CONV5:%.*]] = zext i16 [[TMP3]] to i64 +; GENERIC-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 2 +; GENERIC-NEXT: [[TMP4:%.*]] = load i16, ptr [[INCDEC_PTR1]], align 2 +; GENERIC-NEXT: [[CONV7:%.*]] = zext i16 [[TMP4]] to i64 +; GENERIC-NEXT: [[SUB8:%.*]] = sub nsw i64 [[CONV5]], [[CONV7]] +; GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB8]] +; GENERIC-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2 +; GENERIC-NEXT: [[CONV11:%.*]] = zext i16 [[TMP5]] to i32 ; GENERIC-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]] -; GENERIC-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2 -; GENERIC-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 -; GENERIC-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP14]] -; GENERIC-NEXT: [[TMP15:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2 -; GENERIC-NEXT: [[CONV20:%.*]] = zext i16 [[TMP15]] to i32 +; GENERIC-NEXT: [[INCDEC_PTR13:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 3 +; GENERIC-NEXT: [[TMP6:%.*]] = load i16, ptr [[INCDEC_PTR4]], align 2 +; GENERIC-NEXT: [[CONV14:%.*]] = zext i16 [[TMP6]] to i64 +; GENERIC-NEXT: [[INCDEC_PTR15:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 3 +; GENERIC-NEXT: [[TMP7:%.*]] = load i16, ptr [[INCDEC_PTR6]], align 2 +; GENERIC-NEXT: [[CONV16:%.*]] = zext i16 [[TMP7]] to i64 +; GENERIC-NEXT: [[SUB17:%.*]] = sub nsw i64 [[CONV14]], [[CONV16]] +; GENERIC-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB17]] +; GENERIC-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2 +; GENERIC-NEXT: [[CONV20:%.*]] = zext i16 [[TMP8]] to i32 ; GENERIC-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]] -; GENERIC-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3 -; GENERIC-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 -; GENERIC-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP17]] -; GENERIC-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2 -; GENERIC-NEXT: [[CONV29:%.*]] = zext i16 [[TMP18]] to i32 +; GENERIC-NEXT: [[INCDEC_PTR22:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 4 +; GENERIC-NEXT: [[TMP9:%.*]] = load i16, ptr [[INCDEC_PTR13]], align 2 +; GENERIC-NEXT: [[CONV23:%.*]] = zext i16 [[TMP9]] to i64 +; GENERIC-NEXT: [[INCDEC_PTR24:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 4 +; GENERIC-NEXT: [[TMP10:%.*]] = load i16, ptr [[INCDEC_PTR15]], align 2 +; GENERIC-NEXT: [[CONV25:%.*]] = zext i16 [[TMP10]] to i64 +; GENERIC-NEXT: [[SUB26:%.*]] = sub nsw i64 [[CONV23]], [[CONV25]] +; GENERIC-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB26]] +; GENERIC-NEXT: [[TMP11:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2 +; GENERIC-NEXT: [[CONV29:%.*]] = zext i16 [[TMP11]] to i32 ; GENERIC-NEXT: [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]] -; GENERIC-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4 -; GENERIC-NEXT: [[TMP20:%.*]] = sext i32 [[TMP19]] to i64 -; GENERIC-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP20]] -; GENERIC-NEXT: [[TMP21:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2 -; GENERIC-NEXT: [[CONV38:%.*]] = zext i16 [[TMP21]] to i32 +; GENERIC-NEXT: [[INCDEC_PTR31:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 5 +; GENERIC-NEXT: [[TMP12:%.*]] = load i16, ptr [[INCDEC_PTR22]], align 2 +; GENERIC-NEXT: [[CONV32:%.*]] = zext i16 [[TMP12]] to i64 +; GENERIC-NEXT: [[INCDEC_PTR33:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 5 +; GENERIC-NEXT: [[TMP13:%.*]] = load i16, ptr [[INCDEC_PTR24]], align 2 +; GENERIC-NEXT: [[CONV34:%.*]] = zext i16 [[TMP13]] to i64 +; GENERIC-NEXT: [[SUB35:%.*]] = sub nsw i64 [[CONV32]], [[CONV34]] +; GENERIC-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB35]] +; GENERIC-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2 +; GENERIC-NEXT: [[CONV38:%.*]] = zext i16 [[TMP14]] to i32 ; GENERIC-NEXT: [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]] -; GENERIC-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5 -; GENERIC-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 -; GENERIC-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP23]] -; GENERIC-NEXT: [[TMP24:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2 -; GENERIC-NEXT: [[CONV47:%.*]] = zext i16 [[TMP24]] to i32 +; GENERIC-NEXT: [[INCDEC_PTR40:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 6 +; GENERIC-NEXT: [[TMP15:%.*]] = load i16, ptr [[INCDEC_PTR31]], align 2 +; GENERIC-NEXT: [[CONV41:%.*]] = zext i16 [[TMP15]] to i64 +; GENERIC-NEXT: [[INCDEC_PTR42:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 6 +; GENERIC-NEXT: [[TMP16:%.*]] = load i16, ptr [[INCDEC_PTR33]], align 2 +; GENERIC-NEXT: [[CONV43:%.*]] = zext i16 [[TMP16]] to i64 +; GENERIC-NEXT: [[SUB44:%.*]] = sub nsw i64 [[CONV41]], [[CONV43]] +; GENERIC-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB44]] +; GENERIC-NEXT: [[TMP17:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2 +; GENERIC-NEXT: [[CONV47:%.*]] = zext i16 [[TMP17]] to i32 ; GENERIC-NEXT: [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]] -; GENERIC-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6 -; GENERIC-NEXT: [[TMP26:%.*]] = sext i32 [[TMP25]] to i64 -; GENERIC-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP26]] -; GENERIC-NEXT: [[TMP27:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2 -; GENERIC-NEXT: [[CONV56:%.*]] = zext i16 [[TMP27]] to i32 +; GENERIC-NEXT: [[INCDEC_PTR49:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 7 +; GENERIC-NEXT: [[TMP18:%.*]] = load i16, ptr [[INCDEC_PTR40]], align 2 +; GENERIC-NEXT: [[CONV50:%.*]] = zext i16 [[TMP18]] to i64 +; GENERIC-NEXT: [[INCDEC_PTR51:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 7 +; GENERIC-NEXT: [[TMP19:%.*]] = load i16, ptr [[INCDEC_PTR42]], align 2 +; GENERIC-NEXT: [[CONV52:%.*]] = zext i16 [[TMP19]] to i64 +; GENERIC-NEXT: [[SUB53:%.*]] = sub nsw i64 [[CONV50]], [[CONV52]] +; GENERIC-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB53]] +; GENERIC-NEXT: [[TMP20:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2 +; GENERIC-NEXT: [[CONV56:%.*]] = zext i16 [[TMP20]] to i32 ; GENERIC-NEXT: [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]] -; GENERIC-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7 -; GENERIC-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -; GENERIC-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP29]] -; GENERIC-NEXT: [[TMP30:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2 -; GENERIC-NEXT: [[CONV65:%.*]] = zext i16 [[TMP30]] to i32 +; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8 +; GENERIC-NEXT: [[TMP21:%.*]] = load i16, ptr [[INCDEC_PTR49]], align 2 +; GENERIC-NEXT: [[CONV59:%.*]] = zext i16 [[TMP21]] to i64 +; GENERIC-NEXT: [[TMP22:%.*]] = load i16, ptr [[INCDEC_PTR51]], align 2 +; GENERIC-NEXT: [[CONV61:%.*]] = zext i16 [[TMP22]] to i64 +; GENERIC-NEXT: [[SUB62:%.*]] = sub nsw i64 [[CONV59]], [[CONV61]] +; GENERIC-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB62]] +; GENERIC-NEXT: [[TMP23:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2 +; GENERIC-NEXT: [[CONV65:%.*]] = zext i16 [[TMP23]] to i32 ; GENERIC-NEXT: [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]] ; GENERIC-NEXT: [[INC]] = add nuw nsw i32 [[I_0103]], 1 ; GENERIC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] @@ -109,59 +142,92 @@ ; KRYO-NEXT: [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; KRYO-NEXT: [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; KRYO-NEXT: [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ] -; KRYO-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8 -; KRYO-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2 -; KRYO-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[TMP1]] to <8 x i32> -; KRYO-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2 -; KRYO-NEXT: [[TMP5:%.*]] = zext <8 x i16> [[TMP4]] to <8 x i32> -; KRYO-NEXT: [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP2]], [[TMP5]] -; KRYO-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0 -; KRYO-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 -; KRYO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP8]] -; KRYO-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -; KRYO-NEXT: [[CONV3:%.*]] = zext i16 [[TMP9]] to i32 +; KRYO-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 1 +; KRYO-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR_0101]], align 2 +; KRYO-NEXT: [[CONV:%.*]] = zext i16 [[TMP0]] to i64 +; KRYO-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 1 +; KRYO-NEXT: [[TMP1:%.*]] = load i16, ptr [[B]], align 2 +; KRYO-NEXT: [[CONV2:%.*]] = zext i16 [[TMP1]] to i64 +; KRYO-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV2]] +; KRYO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[SUB]] +; KRYO-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; KRYO-NEXT: [[CONV3:%.*]] = zext i16 [[TMP2]] to i32 ; KRYO-NEXT: [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]] -; KRYO-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1 -; KRYO-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 -; KRYO-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP11]] -; KRYO-NEXT: [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2 -; KRYO-NEXT: [[CONV11:%.*]] = zext i16 [[TMP12]] to i32 +; KRYO-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 2 +; KRYO-NEXT: [[TMP3:%.*]] = load i16, ptr [[INCDEC_PTR]], align 2 +; KRYO-NEXT: [[CONV5:%.*]] = zext i16 [[TMP3]] to i64 +; KRYO-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 2 +; KRYO-NEXT: [[TMP4:%.*]] = load i16, ptr [[INCDEC_PTR1]], align 2 +; KRYO-NEXT: [[CONV7:%.*]] = zext i16 [[TMP4]] to i64 +; KRYO-NEXT: [[SUB8:%.*]] = sub nsw i64 [[CONV5]], [[CONV7]] +; KRYO-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB8]] +; KRYO-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2 +; KRYO-NEXT: [[CONV11:%.*]] = zext i16 [[TMP5]] to i32 ; KRYO-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]] -; KRYO-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2 -; KRYO-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 -; KRYO-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP14]] -; KRYO-NEXT: [[TMP15:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2 -; KRYO-NEXT: [[CONV20:%.*]] = zext i16 [[TMP15]] to i32 +; KRYO-NEXT: [[INCDEC_PTR13:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 3 +; KRYO-NEXT: [[TMP6:%.*]] = load i16, ptr [[INCDEC_PTR4]], align 2 +; KRYO-NEXT: [[CONV14:%.*]] = zext i16 [[TMP6]] to i64 +; KRYO-NEXT: [[INCDEC_PTR15:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 3 +; KRYO-NEXT: [[TMP7:%.*]] = load i16, ptr [[INCDEC_PTR6]], align 2 +; KRYO-NEXT: [[CONV16:%.*]] = zext i16 [[TMP7]] to i64 +; KRYO-NEXT: [[SUB17:%.*]] = sub nsw i64 [[CONV14]], [[CONV16]] +; KRYO-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB17]] +; KRYO-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2 +; KRYO-NEXT: [[CONV20:%.*]] = zext i16 [[TMP8]] to i32 ; KRYO-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]] -; KRYO-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3 -; KRYO-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 -; KRYO-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP17]] -; KRYO-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2 -; KRYO-NEXT: [[CONV29:%.*]] = zext i16 [[TMP18]] to i32 +; KRYO-NEXT: [[INCDEC_PTR22:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 4 +; KRYO-NEXT: [[TMP9:%.*]] = load i16, ptr [[INCDEC_PTR13]], align 2 +; KRYO-NEXT: [[CONV23:%.*]] = zext i16 [[TMP9]] to i64 +; KRYO-NEXT: [[INCDEC_PTR24:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 4 +; KRYO-NEXT: [[TMP10:%.*]] = load i16, ptr [[INCDEC_PTR15]], align 2 +; KRYO-NEXT: [[CONV25:%.*]] = zext i16 [[TMP10]] to i64 +; KRYO-NEXT: [[SUB26:%.*]] = sub nsw i64 [[CONV23]], [[CONV25]] +; KRYO-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB26]] +; KRYO-NEXT: [[TMP11:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2 +; KRYO-NEXT: [[CONV29:%.*]] = zext i16 [[TMP11]] to i32 ; KRYO-NEXT: [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]] -; KRYO-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4 -; KRYO-NEXT: [[TMP20:%.*]] = sext i32 [[TMP19]] to i64 -; KRYO-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP20]] -; KRYO-NEXT: [[TMP21:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2 -; KRYO-NEXT: [[CONV38:%.*]] = zext i16 [[TMP21]] to i32 +; KRYO-NEXT: [[INCDEC_PTR31:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 5 +; KRYO-NEXT: [[TMP12:%.*]] = load i16, ptr [[INCDEC_PTR22]], align 2 +; KRYO-NEXT: [[CONV32:%.*]] = zext i16 [[TMP12]] to i64 +; KRYO-NEXT: [[INCDEC_PTR33:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 5 +; KRYO-NEXT: [[TMP13:%.*]] = load i16, ptr [[INCDEC_PTR24]], align 2 +; KRYO-NEXT: [[CONV34:%.*]] = zext i16 [[TMP13]] to i64 +; KRYO-NEXT: [[SUB35:%.*]] = sub nsw i64 [[CONV32]], [[CONV34]] +; KRYO-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB35]] +; KRYO-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2 +; KRYO-NEXT: [[CONV38:%.*]] = zext i16 [[TMP14]] to i32 ; KRYO-NEXT: [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]] -; KRYO-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5 -; KRYO-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 -; KRYO-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP23]] -; KRYO-NEXT: [[TMP24:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2 -; KRYO-NEXT: [[CONV47:%.*]] = zext i16 [[TMP24]] to i32 +; KRYO-NEXT: [[INCDEC_PTR40:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 6 +; KRYO-NEXT: [[TMP15:%.*]] = load i16, ptr [[INCDEC_PTR31]], align 2 +; KRYO-NEXT: [[CONV41:%.*]] = zext i16 [[TMP15]] to i64 +; KRYO-NEXT: [[INCDEC_PTR42:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 6 +; KRYO-NEXT: [[TMP16:%.*]] = load i16, ptr [[INCDEC_PTR33]], align 2 +; KRYO-NEXT: [[CONV43:%.*]] = zext i16 [[TMP16]] to i64 +; KRYO-NEXT: [[SUB44:%.*]] = sub nsw i64 [[CONV41]], [[CONV43]] +; KRYO-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB44]] +; KRYO-NEXT: [[TMP17:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2 +; KRYO-NEXT: [[CONV47:%.*]] = zext i16 [[TMP17]] to i32 ; KRYO-NEXT: [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]] -; KRYO-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6 -; KRYO-NEXT: [[TMP26:%.*]] = sext i32 [[TMP25]] to i64 -; KRYO-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP26]] -; KRYO-NEXT: [[TMP27:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2 -; KRYO-NEXT: [[CONV56:%.*]] = zext i16 [[TMP27]] to i32 +; KRYO-NEXT: [[INCDEC_PTR49:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 7 +; KRYO-NEXT: [[TMP18:%.*]] = load i16, ptr [[INCDEC_PTR40]], align 2 +; KRYO-NEXT: [[CONV50:%.*]] = zext i16 [[TMP18]] to i64 +; KRYO-NEXT: [[INCDEC_PTR51:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 7 +; KRYO-NEXT: [[TMP19:%.*]] = load i16, ptr [[INCDEC_PTR42]], align 2 +; KRYO-NEXT: [[CONV52:%.*]] = zext i16 [[TMP19]] to i64 +; KRYO-NEXT: [[SUB53:%.*]] = sub nsw i64 [[CONV50]], [[CONV52]] +; KRYO-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB53]] +; KRYO-NEXT: [[TMP20:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2 +; KRYO-NEXT: [[CONV56:%.*]] = zext i16 [[TMP20]] to i32 ; KRYO-NEXT: [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]] -; KRYO-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7 -; KRYO-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -; KRYO-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP29]] -; KRYO-NEXT: [[TMP30:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2 -; KRYO-NEXT: [[CONV65:%.*]] = zext i16 [[TMP30]] to i32 +; KRYO-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8 +; KRYO-NEXT: [[TMP21:%.*]] = load i16, ptr [[INCDEC_PTR49]], align 2 +; KRYO-NEXT: [[CONV59:%.*]] = zext i16 [[TMP21]] to i64 +; KRYO-NEXT: [[TMP22:%.*]] = load i16, ptr [[INCDEC_PTR51]], align 2 +; KRYO-NEXT: [[CONV61:%.*]] = zext i16 [[TMP22]] to i64 +; KRYO-NEXT: [[SUB62:%.*]] = sub nsw i64 [[CONV59]], [[CONV61]] +; KRYO-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB62]] +; KRYO-NEXT: [[TMP23:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2 +; KRYO-NEXT: [[CONV65:%.*]] = zext i16 [[TMP23]] to i32 ; KRYO-NEXT: [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]] ; KRYO-NEXT: [[INC]] = add nuw nsw i32 [[I_0103]], 1 ; KRYO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] @@ -293,59 +359,92 @@ ; GENERIC-NEXT: [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; GENERIC-NEXT: [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; GENERIC-NEXT: [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ] -; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8 -; GENERIC-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2 -; GENERIC-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[TMP1]] to <8 x i32> -; GENERIC-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2 -; GENERIC-NEXT: [[TMP5:%.*]] = zext <8 x i16> [[TMP4]] to <8 x i32> -; GENERIC-NEXT: [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP2]], [[TMP5]] -; GENERIC-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0 -; GENERIC-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 -; GENERIC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP8]] -; GENERIC-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -; GENERIC-NEXT: [[CONV3:%.*]] = zext i16 [[TMP9]] to i32 +; GENERIC-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 1 +; GENERIC-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR_0101]], align 2 +; GENERIC-NEXT: [[CONV:%.*]] = zext i16 [[TMP0]] to i64 +; GENERIC-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 1 +; GENERIC-NEXT: [[TMP1:%.*]] = load i16, ptr [[B]], align 2 +; GENERIC-NEXT: [[CONV2:%.*]] = zext i16 [[TMP1]] to i64 +; GENERIC-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV2]] +; GENERIC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[SUB]] +; GENERIC-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; GENERIC-NEXT: [[CONV3:%.*]] = zext i16 [[TMP2]] to i32 ; GENERIC-NEXT: [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]] -; GENERIC-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1 -; GENERIC-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 -; GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP11]] -; GENERIC-NEXT: [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2 -; GENERIC-NEXT: [[CONV11:%.*]] = zext i16 [[TMP12]] to i32 +; GENERIC-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 2 +; GENERIC-NEXT: [[TMP3:%.*]] = load i16, ptr [[INCDEC_PTR]], align 2 +; GENERIC-NEXT: [[CONV5:%.*]] = zext i16 [[TMP3]] to i64 +; GENERIC-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 2 +; GENERIC-NEXT: [[TMP4:%.*]] = load i16, ptr [[INCDEC_PTR1]], align 2 +; GENERIC-NEXT: [[CONV7:%.*]] = zext i16 [[TMP4]] to i64 +; GENERIC-NEXT: [[SUB8:%.*]] = sub nsw i64 [[CONV5]], [[CONV7]] +; GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB8]] +; GENERIC-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2 +; GENERIC-NEXT: [[CONV11:%.*]] = zext i16 [[TMP5]] to i32 ; GENERIC-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]] -; GENERIC-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2 -; GENERIC-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 -; GENERIC-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP14]] -; GENERIC-NEXT: [[TMP15:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2 -; GENERIC-NEXT: [[CONV20:%.*]] = zext i16 [[TMP15]] to i32 +; GENERIC-NEXT: [[INCDEC_PTR13:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 3 +; GENERIC-NEXT: [[TMP6:%.*]] = load i16, ptr [[INCDEC_PTR4]], align 2 +; GENERIC-NEXT: [[CONV14:%.*]] = zext i16 [[TMP6]] to i64 +; GENERIC-NEXT: [[INCDEC_PTR15:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 3 +; GENERIC-NEXT: [[TMP7:%.*]] = load i16, ptr [[INCDEC_PTR6]], align 2 +; GENERIC-NEXT: [[CONV16:%.*]] = zext i16 [[TMP7]] to i64 +; GENERIC-NEXT: [[SUB17:%.*]] = sub nsw i64 [[CONV14]], [[CONV16]] +; GENERIC-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB17]] +; GENERIC-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2 +; GENERIC-NEXT: [[CONV20:%.*]] = zext i16 [[TMP8]] to i32 ; GENERIC-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]] -; GENERIC-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3 -; GENERIC-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 -; GENERIC-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP17]] -; GENERIC-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2 -; GENERIC-NEXT: [[CONV29:%.*]] = zext i16 [[TMP18]] to i32 +; GENERIC-NEXT: [[INCDEC_PTR22:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 4 +; GENERIC-NEXT: [[TMP9:%.*]] = load i16, ptr [[INCDEC_PTR13]], align 2 +; GENERIC-NEXT: [[CONV23:%.*]] = zext i16 [[TMP9]] to i64 +; GENERIC-NEXT: [[INCDEC_PTR24:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 4 +; GENERIC-NEXT: [[TMP10:%.*]] = load i16, ptr [[INCDEC_PTR15]], align 2 +; GENERIC-NEXT: [[CONV25:%.*]] = zext i16 [[TMP10]] to i64 +; GENERIC-NEXT: [[SUB26:%.*]] = sub nsw i64 [[CONV23]], [[CONV25]] +; GENERIC-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB26]] +; GENERIC-NEXT: [[TMP11:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2 +; GENERIC-NEXT: [[CONV29:%.*]] = zext i16 [[TMP11]] to i32 ; GENERIC-NEXT: [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]] -; GENERIC-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4 -; GENERIC-NEXT: [[TMP20:%.*]] = sext i32 [[TMP19]] to i64 -; GENERIC-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP20]] -; GENERIC-NEXT: [[TMP21:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2 -; GENERIC-NEXT: [[CONV38:%.*]] = zext i16 [[TMP21]] to i32 +; GENERIC-NEXT: [[INCDEC_PTR31:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 5 +; GENERIC-NEXT: [[TMP12:%.*]] = load i16, ptr [[INCDEC_PTR22]], align 2 +; GENERIC-NEXT: [[CONV32:%.*]] = zext i16 [[TMP12]] to i64 +; GENERIC-NEXT: [[INCDEC_PTR33:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 5 +; GENERIC-NEXT: [[TMP13:%.*]] = load i16, ptr [[INCDEC_PTR24]], align 2 +; GENERIC-NEXT: [[CONV34:%.*]] = zext i16 [[TMP13]] to i64 +; GENERIC-NEXT: [[SUB35:%.*]] = sub nsw i64 [[CONV32]], [[CONV34]] +; GENERIC-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB35]] +; GENERIC-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2 +; GENERIC-NEXT: [[CONV38:%.*]] = zext i16 [[TMP14]] to i32 ; GENERIC-NEXT: [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]] -; GENERIC-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5 -; GENERIC-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 -; GENERIC-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP23]] -; GENERIC-NEXT: [[TMP24:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2 -; GENERIC-NEXT: [[CONV47:%.*]] = zext i16 [[TMP24]] to i32 +; GENERIC-NEXT: [[INCDEC_PTR40:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 6 +; GENERIC-NEXT: [[TMP15:%.*]] = load i16, ptr [[INCDEC_PTR31]], align 2 +; GENERIC-NEXT: [[CONV41:%.*]] = zext i16 [[TMP15]] to i64 +; GENERIC-NEXT: [[INCDEC_PTR42:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 6 +; GENERIC-NEXT: [[TMP16:%.*]] = load i16, ptr [[INCDEC_PTR33]], align 2 +; GENERIC-NEXT: [[CONV43:%.*]] = zext i16 [[TMP16]] to i64 +; GENERIC-NEXT: [[SUB44:%.*]] = sub nsw i64 [[CONV41]], [[CONV43]] +; GENERIC-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB44]] +; GENERIC-NEXT: [[TMP17:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2 +; GENERIC-NEXT: [[CONV47:%.*]] = zext i16 [[TMP17]] to i32 ; GENERIC-NEXT: [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]] -; GENERIC-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6 -; GENERIC-NEXT: [[TMP26:%.*]] = sext i32 [[TMP25]] to i64 -; GENERIC-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP26]] -; GENERIC-NEXT: [[TMP27:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2 -; GENERIC-NEXT: [[CONV56:%.*]] = zext i16 [[TMP27]] to i32 +; GENERIC-NEXT: [[INCDEC_PTR49:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 7 +; GENERIC-NEXT: [[TMP18:%.*]] = load i16, ptr [[INCDEC_PTR40]], align 2 +; GENERIC-NEXT: [[CONV50:%.*]] = zext i16 [[TMP18]] to i64 +; GENERIC-NEXT: [[INCDEC_PTR51:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 7 +; GENERIC-NEXT: [[TMP19:%.*]] = load i16, ptr [[INCDEC_PTR42]], align 2 +; GENERIC-NEXT: [[CONV52:%.*]] = zext i16 [[TMP19]] to i64 +; GENERIC-NEXT: [[SUB53:%.*]] = sub nsw i64 [[CONV50]], [[CONV52]] +; GENERIC-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB53]] +; GENERIC-NEXT: [[TMP20:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2 +; GENERIC-NEXT: [[CONV56:%.*]] = zext i16 [[TMP20]] to i32 ; GENERIC-NEXT: [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]] -; GENERIC-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7 -; GENERIC-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -; GENERIC-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP29]] -; GENERIC-NEXT: [[TMP30:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2 -; GENERIC-NEXT: [[CONV65:%.*]] = zext i16 [[TMP30]] to i32 +; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8 +; GENERIC-NEXT: [[TMP21:%.*]] = load i16, ptr [[INCDEC_PTR49]], align 2 +; GENERIC-NEXT: [[CONV59:%.*]] = zext i16 [[TMP21]] to i64 +; GENERIC-NEXT: [[TMP22:%.*]] = load i16, ptr [[INCDEC_PTR51]], align 2 +; GENERIC-NEXT: [[CONV61:%.*]] = zext i16 [[TMP22]] to i64 +; GENERIC-NEXT: [[SUB62:%.*]] = sub nsw i64 [[CONV59]], [[CONV61]] +; GENERIC-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB62]] +; GENERIC-NEXT: [[TMP23:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2 +; GENERIC-NEXT: [[CONV65:%.*]] = zext i16 [[TMP23]] to i32 ; GENERIC-NEXT: [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]] ; GENERIC-NEXT: [[INC]] = add nuw nsw i32 [[I_0103]], 1 ; GENERIC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] @@ -366,59 +465,92 @@ ; KRYO-NEXT: [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; KRYO-NEXT: [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; KRYO-NEXT: [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ] -; KRYO-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8 -; KRYO-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2 -; KRYO-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[TMP1]] to <8 x i32> -; KRYO-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2 -; KRYO-NEXT: [[TMP5:%.*]] = zext <8 x i16> [[TMP4]] to <8 x i32> -; KRYO-NEXT: [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP2]], [[TMP5]] -; KRYO-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0 -; KRYO-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 -; KRYO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP8]] -; KRYO-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -; KRYO-NEXT: [[CONV3:%.*]] = zext i16 [[TMP9]] to i32 +; KRYO-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 1 +; KRYO-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR_0101]], align 2 +; KRYO-NEXT: [[CONV:%.*]] = zext i16 [[TMP0]] to i64 +; KRYO-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 1 +; KRYO-NEXT: [[TMP1:%.*]] = load i16, ptr [[B]], align 2 +; KRYO-NEXT: [[CONV2:%.*]] = zext i16 [[TMP1]] to i64 +; KRYO-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV2]] +; KRYO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[SUB]] +; KRYO-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; KRYO-NEXT: [[CONV3:%.*]] = zext i16 [[TMP2]] to i32 ; KRYO-NEXT: [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]] -; KRYO-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1 -; KRYO-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 -; KRYO-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP11]] -; KRYO-NEXT: [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2 -; KRYO-NEXT: [[CONV11:%.*]] = zext i16 [[TMP12]] to i32 +; KRYO-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 2 +; KRYO-NEXT: [[TMP3:%.*]] = load i16, ptr [[INCDEC_PTR]], align 2 +; KRYO-NEXT: [[CONV5:%.*]] = zext i16 [[TMP3]] to i64 +; KRYO-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 2 +; KRYO-NEXT: [[TMP4:%.*]] = load i16, ptr [[INCDEC_PTR1]], align 2 +; KRYO-NEXT: [[CONV7:%.*]] = zext i16 [[TMP4]] to i64 +; KRYO-NEXT: [[SUB8:%.*]] = sub nsw i64 [[CONV5]], [[CONV7]] +; KRYO-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB8]] +; KRYO-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2 +; KRYO-NEXT: [[CONV11:%.*]] = zext i16 [[TMP5]] to i32 ; KRYO-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]] -; KRYO-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2 -; KRYO-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 -; KRYO-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP14]] -; KRYO-NEXT: [[TMP15:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2 -; KRYO-NEXT: [[CONV20:%.*]] = zext i16 [[TMP15]] to i32 +; KRYO-NEXT: [[INCDEC_PTR13:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 3 +; KRYO-NEXT: [[TMP6:%.*]] = load i16, ptr [[INCDEC_PTR4]], align 2 +; KRYO-NEXT: [[CONV14:%.*]] = zext i16 [[TMP6]] to i64 +; KRYO-NEXT: [[INCDEC_PTR15:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 3 +; KRYO-NEXT: [[TMP7:%.*]] = load i16, ptr [[INCDEC_PTR6]], align 2 +; KRYO-NEXT: [[CONV16:%.*]] = zext i16 [[TMP7]] to i64 +; KRYO-NEXT: [[SUB17:%.*]] = sub nsw i64 [[CONV14]], [[CONV16]] +; KRYO-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB17]] +; KRYO-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2 +; KRYO-NEXT: [[CONV20:%.*]] = zext i16 [[TMP8]] to i32 ; KRYO-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]] -; KRYO-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3 -; KRYO-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 -; KRYO-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP17]] -; KRYO-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2 -; KRYO-NEXT: [[CONV29:%.*]] = zext i16 [[TMP18]] to i32 +; KRYO-NEXT: [[INCDEC_PTR22:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 4 +; KRYO-NEXT: [[TMP9:%.*]] = load i16, ptr [[INCDEC_PTR13]], align 2 +; KRYO-NEXT: [[CONV23:%.*]] = zext i16 [[TMP9]] to i64 +; KRYO-NEXT: [[INCDEC_PTR24:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 4 +; KRYO-NEXT: [[TMP10:%.*]] = load i16, ptr [[INCDEC_PTR15]], align 2 +; KRYO-NEXT: [[CONV25:%.*]] = zext i16 [[TMP10]] to i64 +; KRYO-NEXT: [[SUB26:%.*]] = sub nsw i64 [[CONV23]], [[CONV25]] +; KRYO-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB26]] +; KRYO-NEXT: [[TMP11:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2 +; KRYO-NEXT: [[CONV29:%.*]] = zext i16 [[TMP11]] to i32 ; KRYO-NEXT: [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]] -; KRYO-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4 -; KRYO-NEXT: [[TMP20:%.*]] = sext i32 [[TMP19]] to i64 -; KRYO-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP20]] -; KRYO-NEXT: [[TMP21:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2 -; KRYO-NEXT: [[CONV38:%.*]] = zext i16 [[TMP21]] to i32 +; KRYO-NEXT: [[INCDEC_PTR31:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 5 +; KRYO-NEXT: [[TMP12:%.*]] = load i16, ptr [[INCDEC_PTR22]], align 2 +; KRYO-NEXT: [[CONV32:%.*]] = zext i16 [[TMP12]] to i64 +; KRYO-NEXT: [[INCDEC_PTR33:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 5 +; KRYO-NEXT: [[TMP13:%.*]] = load i16, ptr [[INCDEC_PTR24]], align 2 +; KRYO-NEXT: [[CONV34:%.*]] = zext i16 [[TMP13]] to i64 +; KRYO-NEXT: [[SUB35:%.*]] = sub nsw i64 [[CONV32]], [[CONV34]] +; KRYO-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB35]] +; KRYO-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2 +; KRYO-NEXT: [[CONV38:%.*]] = zext i16 [[TMP14]] to i32 ; KRYO-NEXT: [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]] -; KRYO-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5 -; KRYO-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 -; KRYO-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP23]] -; KRYO-NEXT: [[TMP24:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2 -; KRYO-NEXT: [[CONV47:%.*]] = zext i16 [[TMP24]] to i32 +; KRYO-NEXT: [[INCDEC_PTR40:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 6 +; KRYO-NEXT: [[TMP15:%.*]] = load i16, ptr [[INCDEC_PTR31]], align 2 +; KRYO-NEXT: [[CONV41:%.*]] = zext i16 [[TMP15]] to i64 +; KRYO-NEXT: [[INCDEC_PTR42:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 6 +; KRYO-NEXT: [[TMP16:%.*]] = load i16, ptr [[INCDEC_PTR33]], align 2 +; KRYO-NEXT: [[CONV43:%.*]] = zext i16 [[TMP16]] to i64 +; KRYO-NEXT: [[SUB44:%.*]] = sub nsw i64 [[CONV41]], [[CONV43]] +; KRYO-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB44]] +; KRYO-NEXT: [[TMP17:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2 +; KRYO-NEXT: [[CONV47:%.*]] = zext i16 [[TMP17]] to i32 ; KRYO-NEXT: [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]] -; KRYO-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6 -; KRYO-NEXT: [[TMP26:%.*]] = sext i32 [[TMP25]] to i64 -; KRYO-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP26]] -; KRYO-NEXT: [[TMP27:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2 -; KRYO-NEXT: [[CONV56:%.*]] = zext i16 [[TMP27]] to i32 +; KRYO-NEXT: [[INCDEC_PTR49:%.*]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 7 +; KRYO-NEXT: [[TMP18:%.*]] = load i16, ptr [[INCDEC_PTR40]], align 2 +; KRYO-NEXT: [[CONV50:%.*]] = zext i16 [[TMP18]] to i64 +; KRYO-NEXT: [[INCDEC_PTR51:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 7 +; KRYO-NEXT: [[TMP19:%.*]] = load i16, ptr [[INCDEC_PTR42]], align 2 +; KRYO-NEXT: [[CONV52:%.*]] = zext i16 [[TMP19]] to i64 +; KRYO-NEXT: [[SUB53:%.*]] = sub nsw i64 [[CONV50]], [[CONV52]] +; KRYO-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB53]] +; KRYO-NEXT: [[TMP20:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2 +; KRYO-NEXT: [[CONV56:%.*]] = zext i16 [[TMP20]] to i32 ; KRYO-NEXT: [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]] -; KRYO-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7 -; KRYO-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -; KRYO-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP29]] -; KRYO-NEXT: [[TMP30:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2 -; KRYO-NEXT: [[CONV65:%.*]] = zext i16 [[TMP30]] to i32 +; KRYO-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8 +; KRYO-NEXT: [[TMP21:%.*]] = load i16, ptr [[INCDEC_PTR49]], align 2 +; KRYO-NEXT: [[CONV59:%.*]] = zext i16 [[TMP21]] to i64 +; KRYO-NEXT: [[TMP22:%.*]] = load i16, ptr [[INCDEC_PTR51]], align 2 +; KRYO-NEXT: [[CONV61:%.*]] = zext i16 [[TMP22]] to i64 +; KRYO-NEXT: [[SUB62:%.*]] = sub nsw i64 [[CONV59]], [[CONV61]] +; KRYO-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[SUB62]] +; KRYO-NEXT: [[TMP23:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2 +; KRYO-NEXT: [[CONV65:%.*]] = zext i16 [[TMP23]] to i32 ; KRYO-NEXT: [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]] ; KRYO-NEXT: [[INC]] = add nuw nsw i32 [[I_0103]], 1 ; KRYO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll @@ -24,25 +24,25 @@ ; ; YAML-LABEL: Function: getelementptr_4x32 -; YAML: --- !Passed +; YAML: --- !Missed ; YAML-NEXT: Pass: slp-vectorizer -; YAML-NEXT: Name: VectorizedList +; YAML-NEXT: Name: NotBeneficial ; YAML-NEXT: Function: getelementptr_4x32 ; YAML-NEXT: Args: -; YAML-NEXT: - String: 'SLP vectorized with cost ' -; YAML-NEXT: - Cost: '6' -; YAML-NEXT: - String: ' and with tree size ' -; YAML-NEXT: - TreeSize: '3' +; YAML-NEXT: - String: 'List vectorization was possible but not beneficial with cost ' +; YAML-NEXT: - Cost: '-7' +; YAML-NEXT: - String: ' >= ' +; YAML-NEXT: - Treshold: '7' -; YAML: --- !Passed +; YAML: --- !Missed ; YAML-NEXT: Pass: slp-vectorizer -; YAML-NEXT: Name: VectorizedList +; YAML-NEXT: Name: NotBeneficial ; YAML-NEXT: Function: getelementptr_4x32 ; YAML-NEXT: Args: -; YAML-NEXT: - String: 'SLP vectorized with cost ' -; YAML-NEXT: - Cost: '6' -; YAML-NEXT: - String: ' and with tree size ' -; YAML-NEXT: - TreeSize: '3' +; YAML-NEXT: - String: 'List vectorization was possible but not beneficial with cost ' +; YAML-NEXT: - Cost: '-7' +; YAML-NEXT: - String: ' >= ' +; YAML-NEXT: - Treshold: '7' define i32 @getelementptr_4x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @getelementptr_4x32( @@ -50,9 +50,6 @@ ; CHECK-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP31]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 [[X:%.*]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[Z:%.*]], i64 1 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -63,28 +60,23 @@ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[SUM_032:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[ADD16]], [[FOR_BODY]] ] ; CHECK-NEXT: [[T4:%.*]] = shl nuw nsw i32 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[T4]], i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], [[TMP0]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[T4]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[T6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[T6]], [[SUM_032]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP5]], i64 1 -; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP9]] +; CHECK-NEXT: [[T7:%.*]] = add nsw i32 [[T4]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[T7]] to i64 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP1]] ; CHECK-NEXT: [[T8:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]] -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i32> [[TMP4]], [[TMP2]] -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i64 0 -; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP12]] +; CHECK-NEXT: [[T9:%.*]] = add nsw i32 [[T4]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[T9]] to i64 +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP2]] ; CHECK-NEXT: [[T10:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4 ; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD6]], [[T10]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP10]], i64 1 -; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 -; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP14]] +; CHECK-NEXT: [[T11:%.*]] = add nsw i32 [[T4]], [[Z:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[T11]] to i64 +; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP3]] ; CHECK-NEXT: [[T12:%.*]] = load i32, ptr [[ARRAYIDX15]], align 4 ; CHECK-NEXT: [[ADD16]] = add nsw i32 [[ADD11]], [[T12]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 @@ -131,15 +123,15 @@ } ; YAML-LABEL: Function: getelementptr_2x32 -; YAML: --- !Passed +; YAML: --- !Missed ; YAML-NEXT: Pass: slp-vectorizer -; YAML-NEXT: Name: VectorizedList +; YAML-NEXT: Name: NotBeneficial ; YAML-NEXT: Function: getelementptr_2x32 ; YAML-NEXT: Args: -; YAML-NEXT: - String: 'SLP vectorized with cost ' -; YAML-NEXT: - Cost: '6' -; YAML-NEXT: - String: ' and with tree size ' -; YAML-NEXT: - TreeSize: '3' +; YAML-NEXT: - String: 'List vectorization was possible but not beneficial with cost ' +; YAML-NEXT: - Cost: '-7' +; YAML-NEXT: - String: ' >= ' +; YAML-NEXT: - Treshold: '7' define i32 @getelementptr_2x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @getelementptr_2x32( @@ -147,8 +139,6 @@ ; CHECK-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP31]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i64 1 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -159,26 +149,23 @@ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[SUM_032:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[ADD16]], [[FOR_BODY]] ] ; CHECK-NEXT: [[T4:%.*]] = shl nuw nsw i32 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[T4]] to i64 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[T4]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[T6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[T6]], [[SUM_032]] ; CHECK-NEXT: [[T7:%.*]] = or i32 [[T4]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[T7]] to i64 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[T7]] to i64 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP1]] ; CHECK-NEXT: [[T8:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[T4]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i32> [[TMP5]], [[TMP1]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP8]] +; CHECK-NEXT: [[T9:%.*]] = add nsw i32 [[T4]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[T9]] to i64 +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP2]] ; CHECK-NEXT: [[T10:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4 ; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD6]], [[T10]] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP6]], i64 1 -; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP10]] +; CHECK-NEXT: [[T11:%.*]] = add nsw i32 [[T4]], [[Z:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[T11]] to i64 +; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP3]] ; CHECK-NEXT: [[T12:%.*]] = load i32, ptr [[ARRAYIDX15]], align 4 ; CHECK-NEXT: [[ADD16]] = add nsw i32 [[ADD11]], [[T12]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 @@ -232,44 +219,84 @@ ; CHECK-LABEL: @test_i16_extend( ; CHECK-NEXT: [[P_0:%.*]] = load ptr, ptr @global, align 8 ; CHECK-NEXT: [[IDX_0:%.*]] = zext i32 [[IDX_I32:%.*]] to i64 +; CHECK-NEXT: [[IDX_1:%.*]] = add nuw nsw i64 [[IDX_0]], 1 +; CHECK-NEXT: [[IDX_2:%.*]] = add nuw nsw i64 [[IDX_0]], 2 +; CHECK-NEXT: [[IDX_3:%.*]] = add nuw nsw i64 [[IDX_0]], 3 +; CHECK-NEXT: [[IDX_4:%.*]] = add nuw nsw i64 [[IDX_0]], 4 +; CHECK-NEXT: [[IDX_5:%.*]] = add nuw nsw i64 [[IDX_0]], 5 +; CHECK-NEXT: [[IDX_6:%.*]] = add nuw nsw i64 [[IDX_0]], 6 +; CHECK-NEXT: [[IDX_7:%.*]] = add nuw nsw i64 [[IDX_0]], 7 ; CHECK-NEXT: [[T53:%.*]] = getelementptr inbounds i16, ptr [[P_1:%.*]], i64 [[IDX_0]] +; CHECK-NEXT: [[OP1_L:%.*]] = load i16, ptr [[T53]], align 2 +; CHECK-NEXT: [[OP1_EXT:%.*]] = zext i16 [[OP1_L]] to i64 ; CHECK-NEXT: [[T56:%.*]] = getelementptr inbounds i16, ptr [[P_2:%.*]], i64 [[IDX_0]] -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[T53]], align 2 -; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr [[T56]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = zext <8 x i16> [[TMP5]] to <8 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <8 x i32> [[TMP3]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP7]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 -; CHECK-NEXT: [[T60:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP9]] +; CHECK-NEXT: [[OP2_L:%.*]] = load i16, ptr [[T56]], align 2 +; CHECK-NEXT: [[OP2_EXT:%.*]] = zext i16 [[OP2_L]] to i64 +; CHECK-NEXT: [[SUB_1:%.*]] = sub nsw i64 [[OP1_EXT]], [[OP2_EXT]] +; CHECK-NEXT: [[T60:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[SUB_1]] ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[T60]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP7]], i64 1 -; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 -; CHECK-NEXT: [[T71:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP11]] +; CHECK-NEXT: [[T64:%.*]] = getelementptr inbounds i16, ptr [[P_1]], i64 [[IDX_1]] +; CHECK-NEXT: [[T65:%.*]] = load i16, ptr [[T64]], align 2 +; CHECK-NEXT: [[T66:%.*]] = zext i16 [[T65]] to i64 +; CHECK-NEXT: [[T67:%.*]] = getelementptr inbounds i16, ptr [[P_2]], i64 [[IDX_1]] +; CHECK-NEXT: [[T68:%.*]] = load i16, ptr [[T67]], align 2 +; CHECK-NEXT: [[T69:%.*]] = zext i16 [[T68]] to i64 +; CHECK-NEXT: [[SUB_2:%.*]] = sub nsw i64 [[T66]], [[T69]] +; CHECK-NEXT: [[T71:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[SUB_2]] ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[T71]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP7]], i64 2 -; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[T82:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP13]] +; CHECK-NEXT: [[T75:%.*]] = getelementptr inbounds i16, ptr [[P_1]], i64 [[IDX_2]] +; CHECK-NEXT: [[T76:%.*]] = load i16, ptr [[T75]], align 2 +; CHECK-NEXT: [[T77:%.*]] = zext i16 [[T76]] to i64 +; CHECK-NEXT: [[T78:%.*]] = getelementptr inbounds i16, ptr [[P_2]], i64 [[IDX_2]] +; CHECK-NEXT: [[T79:%.*]] = load i16, ptr [[T78]], align 2 +; CHECK-NEXT: [[T80:%.*]] = zext i16 [[T79]] to i64 +; CHECK-NEXT: [[SUB_3:%.*]] = sub nsw i64 [[T77]], [[T80]] +; CHECK-NEXT: [[T82:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[SUB_3]] ; CHECK-NEXT: [[L_3:%.*]] = load i32, ptr [[T82]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP7]], i64 3 -; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[T93:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP15]] +; CHECK-NEXT: [[T86:%.*]] = getelementptr inbounds i16, ptr [[P_1]], i64 [[IDX_3]] +; CHECK-NEXT: [[T87:%.*]] = load i16, ptr [[T86]], align 2 +; CHECK-NEXT: [[T88:%.*]] = zext i16 [[T87]] to i64 +; CHECK-NEXT: [[T89:%.*]] = getelementptr inbounds i16, ptr [[P_2]], i64 [[IDX_3]] +; CHECK-NEXT: [[T90:%.*]] = load i16, ptr [[T89]], align 2 +; CHECK-NEXT: [[T91:%.*]] = zext i16 [[T90]] to i64 +; CHECK-NEXT: [[SUB_4:%.*]] = sub nsw i64 [[T88]], [[T91]] +; CHECK-NEXT: [[T93:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[SUB_4]] ; CHECK-NEXT: [[L_4:%.*]] = load i32, ptr [[T93]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP7]], i64 4 -; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 -; CHECK-NEXT: [[T104:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP17]] +; CHECK-NEXT: [[T97:%.*]] = getelementptr inbounds i16, ptr [[P_1]], i64 [[IDX_4]] +; CHECK-NEXT: [[T98:%.*]] = load i16, ptr [[T97]], align 2 +; CHECK-NEXT: [[T99:%.*]] = zext i16 [[T98]] to i64 +; CHECK-NEXT: [[T100:%.*]] = getelementptr inbounds i16, ptr [[P_2]], i64 [[IDX_4]] +; CHECK-NEXT: [[T101:%.*]] = load i16, ptr [[T100]], align 2 +; CHECK-NEXT: [[T102:%.*]] = zext i16 [[T101]] to i64 +; CHECK-NEXT: [[SUB_5:%.*]] = sub nsw i64 [[T99]], [[T102]] +; CHECK-NEXT: [[T104:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[SUB_5]] ; CHECK-NEXT: [[L_5:%.*]] = load i32, ptr [[T104]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP7]], i64 5 -; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP18]] to i64 -; CHECK-NEXT: [[T115:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP19]] +; CHECK-NEXT: [[T108:%.*]] = getelementptr inbounds i16, ptr [[P_1]], i64 [[IDX_5]] +; CHECK-NEXT: [[T109:%.*]] = load i16, ptr [[T108]], align 2 +; CHECK-NEXT: [[T110:%.*]] = zext i16 [[T109]] to i64 +; CHECK-NEXT: [[T111:%.*]] = getelementptr inbounds i16, ptr [[P_2]], i64 [[IDX_5]] +; CHECK-NEXT: [[T112:%.*]] = load i16, ptr [[T111]], align 2 +; CHECK-NEXT: [[T113:%.*]] = zext i16 [[T112]] to i64 +; CHECK-NEXT: [[SUB_6:%.*]] = sub nsw i64 [[T110]], [[T113]] +; CHECK-NEXT: [[T115:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[SUB_6]] ; CHECK-NEXT: [[L_6:%.*]] = load i32, ptr [[T115]], align 4 -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP7]], i64 6 -; CHECK-NEXT: [[TMP21:%.*]] = sext i32 [[TMP20]] to i64 -; CHECK-NEXT: [[T126:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP21]] +; CHECK-NEXT: [[T119:%.*]] = getelementptr inbounds i16, ptr [[P_1]], i64 [[IDX_6]] +; CHECK-NEXT: [[T120:%.*]] = load i16, ptr [[T119]], align 2 +; CHECK-NEXT: [[T121:%.*]] = zext i16 [[T120]] to i64 +; CHECK-NEXT: [[T122:%.*]] = getelementptr inbounds i16, ptr [[P_2]], i64 [[IDX_6]] +; CHECK-NEXT: [[T123:%.*]] = load i16, ptr [[T122]], align 2 +; CHECK-NEXT: [[T124:%.*]] = zext i16 [[T123]] to i64 +; CHECK-NEXT: [[SUB_7:%.*]] = sub nsw i64 [[T121]], [[T124]] +; CHECK-NEXT: [[T126:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[SUB_7]] ; CHECK-NEXT: [[L_7:%.*]] = load i32, ptr [[T126]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP7]], i64 7 -; CHECK-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 -; CHECK-NEXT: [[T137:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP23]] +; CHECK-NEXT: [[T130:%.*]] = getelementptr inbounds i16, ptr [[P_1]], i64 [[IDX_7]] +; CHECK-NEXT: [[T131:%.*]] = load i16, ptr [[T130]], align 2 +; CHECK-NEXT: [[T132:%.*]] = zext i16 [[T131]] to i64 +; CHECK-NEXT: [[T133:%.*]] = getelementptr inbounds i16, ptr [[P_2]], i64 [[IDX_7]] +; CHECK-NEXT: [[T134:%.*]] = load i16, ptr [[T133]], align 2 +; CHECK-NEXT: [[T135:%.*]] = zext i16 [[T134]] to i64 +; CHECK-NEXT: [[SUB_8:%.*]] = sub nsw i64 [[T132]], [[T135]] +; CHECK-NEXT: [[T137:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[SUB_8]] ; CHECK-NEXT: [[L_8:%.*]] = load i32, ptr [[T137]], align 4 ; CHECK-NEXT: call void @use(i32 [[L_1]], i32 [[L_2]], i32 [[L_3]], i32 [[L_4]], i32 [[L_5]], i32 [[L_6]], i32 [[L_7]], i32 [[L_8]]) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/gep-indices.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/gep-indices.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/gep-indices.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z15 -passes=slp-vectorizer %s -S -o - \ +; RUN: | FileCheck %s +; +; Test that gep indices are not first vectorized and then extracted (into address registers). + +%StructTy = type { i8, i64, i64, i64, i64 } +declare void @bar(ptr, ptr) + +define void @fun(ptr %Addr) { +; CHECK-LABEL: @fun( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[P2472:%.*]] = getelementptr inbounds [[STRUCTTY:%.*]], ptr [[ADDR:%.*]], i64 [[INDVARS_IV]], i32 3 +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[P2472]], align 8 +; CHECK-NEXT: [[P3476:%.*]] = getelementptr inbounds [[STRUCTTY]], ptr [[ADDR]], i64 [[INDVARS_IV]], i32 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[P3476]], align 8 +; CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[TMP0]], 32 +; CHECK-NEXT: [[IDXPROM495:%.*]] = ashr exact i64 [[SEXT]], 32 +; CHECK-NEXT: [[ARRAYIDX496:%.*]] = getelementptr inbounds [3 x float], ptr null, i64 [[IDXPROM495]] +; CHECK-NEXT: [[SEXT4:%.*]] = shl i64 [[TMP1]], 32 +; CHECK-NEXT: [[IDXPROM499:%.*]] = ashr exact i64 [[SEXT4]], 32 +; CHECK-NEXT: [[ARRAYIDX500:%.*]] = getelementptr inbounds [3 x float], ptr null, i64 [[IDXPROM499]] +; CHECK-NEXT: tail call void @bar(ptr noundef poison, ptr noundef [[ARRAYIDX500]]) +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: br label [[FOR_COND]] +; +entry: + br label %for.cond + +for.cond: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.cond ], [ 0, %entry ] + %P2472 = getelementptr inbounds %StructTy, ptr %Addr, i64 %indvars.iv, i32 3 + %0 = load i64, ptr %P2472, align 8 + %P3476 = getelementptr inbounds %StructTy, ptr %Addr, i64 %indvars.iv, i32 4 + %1 = load i64, ptr %P3476, align 8 + %sext = shl i64 %0, 32 + %idxprom495 = ashr exact i64 %sext, 32 + %arrayidx496 = getelementptr inbounds [3 x float], ptr null, i64 %idxprom495 + %sext4 = shl i64 %1, 32 + %idxprom499 = ashr exact i64 %sext4, 32 + %arrayidx500 = getelementptr inbounds [3 x float], ptr null, i64 %idxprom499 + tail call void @bar(ptr noundef poison, ptr noundef %arrayidx500) + %indvars.iv.next = add i64 %indvars.iv, 1 + br label %for.cond +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll @@ -134,24 +134,49 @@ } define void @PR43578_prefer128(ptr %r, ptr %p, ptr %q) #0 { -; CHECK-LABEL: @PR43578_prefer128( -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 2 -; CHECK-NEXT: [[Q2:%.*]] = getelementptr inbounds i64, ptr [[Q:%.*]], i64 2 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[Q]], align 2 -; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <2 x i64> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[P2]], align 2 -; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr [[Q2]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <2 x i64> [[TMP4]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; CHECK-NEXT: [[G0:%.*]] = getelementptr inbounds i32, ptr [[R:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 -; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0 -; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1 -; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP10]] -; CHECK-NEXT: ret void +; AVX2-LABEL: @PR43578_prefer128( +; AVX2-NEXT: [[P1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1 +; AVX2-NEXT: [[P2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 2 +; AVX2-NEXT: [[P3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 3 +; AVX2-NEXT: [[Q1:%.*]] = getelementptr inbounds i64, ptr [[Q:%.*]], i64 1 +; AVX2-NEXT: [[Q2:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 2 +; AVX2-NEXT: [[Q3:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 3 +; AVX2-NEXT: [[X0:%.*]] = load i64, ptr [[P]], align 2 +; AVX2-NEXT: [[X1:%.*]] = load i64, ptr [[P1]], align 2 +; AVX2-NEXT: [[X2:%.*]] = load i64, ptr [[P2]], align 2 +; AVX2-NEXT: [[X3:%.*]] = load i64, ptr [[P3]], align 2 +; AVX2-NEXT: [[Y0:%.*]] = load i64, ptr [[Q]], align 2 +; AVX2-NEXT: [[Y1:%.*]] = load i64, ptr [[Q1]], align 2 +; AVX2-NEXT: [[Y2:%.*]] = load i64, ptr [[Q2]], align 2 +; AVX2-NEXT: [[Y3:%.*]] = load i64, ptr [[Q3]], align 2 +; AVX2-NEXT: [[SUB0:%.*]] = sub nsw i64 [[X0]], [[Y0]] +; AVX2-NEXT: [[SUB1:%.*]] = sub nsw i64 [[X1]], [[Y1]] +; AVX2-NEXT: [[SUB2:%.*]] = sub nsw i64 [[X2]], [[Y2]] +; AVX2-NEXT: [[SUB3:%.*]] = sub nsw i64 [[X3]], [[Y3]] +; AVX2-NEXT: [[G0:%.*]] = getelementptr inbounds i32, ptr [[R:%.*]], i64 [[SUB0]] +; AVX2-NEXT: [[G1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[SUB1]] +; AVX2-NEXT: [[G2:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[SUB2]] +; AVX2-NEXT: [[G3:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[SUB3]] +; AVX2-NEXT: ret void +; +; AVX512-LABEL: @PR43578_prefer128( +; AVX512-NEXT: [[P2:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 2 +; AVX512-NEXT: [[Q2:%.*]] = getelementptr inbounds i64, ptr [[Q:%.*]], i64 2 +; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P]], align 2 +; AVX512-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[Q]], align 2 +; AVX512-NEXT: [[TMP3:%.*]] = sub nsw <2 x i64> [[TMP1]], [[TMP2]] +; AVX512-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[P2]], align 2 +; AVX512-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr [[Q2]], align 2 +; AVX512-NEXT: [[TMP6:%.*]] = sub nsw <2 x i64> [[TMP4]], [[TMP5]] +; AVX512-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 +; AVX512-NEXT: [[G0:%.*]] = getelementptr inbounds i32, ptr [[R:%.*]], i64 [[TMP7]] +; AVX512-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; AVX512-NEXT: [[G1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP8]] +; AVX512-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0 +; AVX512-NEXT: [[G2:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP9]] +; AVX512-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1 +; AVX512-NEXT: [[G3:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP10]] +; AVX512-NEXT: ret void ; %p1 = getelementptr inbounds i64, ptr %p, i64 1 %p2 = getelementptr inbounds i64, ptr %p, i64 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll @@ -134,24 +134,49 @@ } define void @PR43578_prefer128(ptr %r, ptr %p, ptr %q) #0 { -; CHECK-LABEL: @PR43578_prefer128( -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 2 -; CHECK-NEXT: [[Q2:%.*]] = getelementptr inbounds i64, ptr [[Q:%.*]], i64 2 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[Q]], align 2 -; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <2 x i64> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[P2]], align 2 -; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr [[Q2]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <2 x i64> [[TMP4]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; CHECK-NEXT: [[G0:%.*]] = getelementptr inbounds i32, ptr [[R:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 -; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0 -; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1 -; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP10]] -; CHECK-NEXT: ret void +; AVX2-LABEL: @PR43578_prefer128( +; AVX2-NEXT: [[P1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1 +; AVX2-NEXT: [[P2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 2 +; AVX2-NEXT: [[P3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 3 +; AVX2-NEXT: [[Q1:%.*]] = getelementptr inbounds i64, ptr [[Q:%.*]], i64 1 +; AVX2-NEXT: [[Q2:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 2 +; AVX2-NEXT: [[Q3:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 3 +; AVX2-NEXT: [[X0:%.*]] = load i64, ptr [[P]], align 2 +; AVX2-NEXT: [[X1:%.*]] = load i64, ptr [[P1]], align 2 +; AVX2-NEXT: [[X2:%.*]] = load i64, ptr [[P2]], align 2 +; AVX2-NEXT: [[X3:%.*]] = load i64, ptr [[P3]], align 2 +; AVX2-NEXT: [[Y0:%.*]] = load i64, ptr [[Q]], align 2 +; AVX2-NEXT: [[Y1:%.*]] = load i64, ptr [[Q1]], align 2 +; AVX2-NEXT: [[Y2:%.*]] = load i64, ptr [[Q2]], align 2 +; AVX2-NEXT: [[Y3:%.*]] = load i64, ptr [[Q3]], align 2 +; AVX2-NEXT: [[SUB0:%.*]] = sub nsw i64 [[X0]], [[Y0]] +; AVX2-NEXT: [[SUB1:%.*]] = sub nsw i64 [[X1]], [[Y1]] +; AVX2-NEXT: [[SUB2:%.*]] = sub nsw i64 [[X2]], [[Y2]] +; AVX2-NEXT: [[SUB3:%.*]] = sub nsw i64 [[X3]], [[Y3]] +; AVX2-NEXT: [[G0:%.*]] = getelementptr inbounds i32, ptr [[R:%.*]], i64 [[SUB0]] +; AVX2-NEXT: [[G1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[SUB1]] +; AVX2-NEXT: [[G2:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[SUB2]] +; AVX2-NEXT: [[G3:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[SUB3]] +; AVX2-NEXT: ret void +; +; AVX512-LABEL: @PR43578_prefer128( +; AVX512-NEXT: [[P2:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 2 +; AVX512-NEXT: [[Q2:%.*]] = getelementptr inbounds i64, ptr [[Q:%.*]], i64 2 +; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P]], align 2 +; AVX512-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[Q]], align 2 +; AVX512-NEXT: [[TMP3:%.*]] = sub nsw <2 x i64> [[TMP1]], [[TMP2]] +; AVX512-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[P2]], align 2 +; AVX512-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr [[Q2]], align 2 +; AVX512-NEXT: [[TMP6:%.*]] = sub nsw <2 x i64> [[TMP4]], [[TMP5]] +; AVX512-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 +; AVX512-NEXT: [[G0:%.*]] = getelementptr inbounds i32, ptr [[R:%.*]], i64 [[TMP7]] +; AVX512-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; AVX512-NEXT: [[G1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP8]] +; AVX512-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0 +; AVX512-NEXT: [[G2:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP9]] +; AVX512-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1 +; AVX512-NEXT: [[G3:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP10]] +; AVX512-NEXT: ret void ; %p1 = getelementptr inbounds i64, ptr %p, i64 1 %p2 = getelementptr inbounds i64, ptr %p, i64 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -slp-threshold=-6 -passes=slp-vectorizer,instcombine -mattr=+sse2 -S | FileCheck %s --check-prefixes=CHECK,SSE -; RUN: opt < %s -slp-threshold=-6 -passes=slp-vectorizer,instcombine -mattr=+avx -S | FileCheck %s --check-prefixes=CHECK,AVX -; RUN: opt < %s -slp-threshold=-6 -passes=slp-vectorizer,instcombine -mattr=+avx2 -S | FileCheck %s --check-prefixes=CHECK,AVX -; RUN: opt < %s -slp-threshold=-6 -passes=slp-vectorizer,instcombine -mattr=+avx512f -S | FileCheck %s --check-prefixes=CHECK,AVX -; RUN: opt < %s -slp-threshold=-6 -passes=slp-vectorizer,instcombine -mattr=+avx512vl -S | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: opt < %s -slp-threshold=-6 -passes=slp-vectorizer,instcombine -mattr=+sse2 -S | FileCheck %s --check-prefixes=SSE +; RUN: opt < %s -slp-threshold=-6 -passes=slp-vectorizer,instcombine -mattr=+avx -S | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -slp-threshold=-6 -passes=slp-vectorizer,instcombine -mattr=+avx2 -S | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -slp-threshold=-6 -passes=slp-vectorizer,instcombine -mattr=+avx512f -S | FileCheck %s --check-prefixes=AVX512F +; RUN: opt < %s -slp-threshold=-6 -passes=slp-vectorizer,instcombine -mattr=+avx512vl -S | FileCheck %s --check-prefixes=AVX512VL target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -15,21 +15,76 @@ ; zero-extend the roots back to their original sizes. ; define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) { -; CHECK-LABEL: @PR31243_zext( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64 -; CHECK-NEXT: [[TMP_4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1 -; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i64 -; CHECK-NEXT: [[TMP_5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP_6:%.*]] = load i8, ptr [[TMP_4]], align 1 -; CHECK-NEXT: [[TMP_7:%.*]] = load i8, ptr [[TMP_5]], align 1 -; CHECK-NEXT: [[TMP_8:%.*]] = add i8 [[TMP_6]], [[TMP_7]] -; CHECK-NEXT: ret i8 [[TMP_8]] +; SSE-LABEL: @PR31243_zext( +; SSE-NEXT: entry: +; SSE-NEXT: [[TMP0:%.*]] = or i8 [[V0:%.*]], 1 +; SSE-NEXT: [[TMP1:%.*]] = or i8 [[V1:%.*]], 1 +; SSE-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64 +; SSE-NEXT: [[TMP_4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP2]] +; SSE-NEXT: [[TMP3:%.*]] = zext i8 [[TMP1]] to i64 +; SSE-NEXT: [[TMP_5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP3]] +; SSE-NEXT: [[TMP_6:%.*]] = load i8, ptr [[TMP_4]], align 1 +; SSE-NEXT: [[TMP_7:%.*]] = load i8, ptr [[TMP_5]], align 1 +; SSE-NEXT: [[TMP_8:%.*]] = add i8 [[TMP_6]], [[TMP_7]] +; SSE-NEXT: ret i8 [[TMP_8]] +; +; AVX-LABEL: @PR31243_zext( +; AVX-NEXT: entry: +; AVX-NEXT: [[TMP0:%.*]] = or i8 [[V0:%.*]], 1 +; AVX-NEXT: [[TMP1:%.*]] = or i8 [[V1:%.*]], 1 +; AVX-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64 +; AVX-NEXT: [[TMP_4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP2]] +; AVX-NEXT: [[TMP3:%.*]] = zext i8 [[TMP1]] to i64 +; AVX-NEXT: [[TMP_5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP3]] +; AVX-NEXT: [[TMP_6:%.*]] = load i8, ptr [[TMP_4]], align 1 +; AVX-NEXT: [[TMP_7:%.*]] = load i8, ptr [[TMP_5]], align 1 +; AVX-NEXT: [[TMP_8:%.*]] = add i8 [[TMP_6]], [[TMP_7]] +; AVX-NEXT: ret i8 [[TMP_8]] +; +; AVX2-LABEL: @PR31243_zext( +; AVX2-NEXT: entry: +; AVX2-NEXT: [[TMP0:%.*]] = or i8 [[V0:%.*]], 1 +; AVX2-NEXT: [[TMP1:%.*]] = or i8 [[V1:%.*]], 1 +; AVX2-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64 +; AVX2-NEXT: [[TMP_4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP2]] +; AVX2-NEXT: [[TMP3:%.*]] = zext i8 [[TMP1]] to i64 +; AVX2-NEXT: [[TMP_5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP3]] +; AVX2-NEXT: [[TMP_6:%.*]] = load i8, ptr [[TMP_4]], align 1 +; AVX2-NEXT: [[TMP_7:%.*]] = load i8, ptr [[TMP_5]], align 1 +; AVX2-NEXT: [[TMP_8:%.*]] = add i8 [[TMP_6]], [[TMP_7]] +; AVX2-NEXT: ret i8 [[TMP_8]] +; +; AVX512F-LABEL: @PR31243_zext( +; AVX512F-NEXT: entry: +; AVX512F-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 +; AVX512F-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 +; AVX512F-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], +; AVX512F-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0 +; AVX512F-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64 +; AVX512F-NEXT: [[TMP_4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]] +; AVX512F-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1 +; AVX512F-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i64 +; AVX512F-NEXT: [[TMP_5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]] +; AVX512F-NEXT: [[TMP_6:%.*]] = load i8, ptr [[TMP_4]], align 1 +; AVX512F-NEXT: [[TMP_7:%.*]] = load i8, ptr [[TMP_5]], align 1 +; AVX512F-NEXT: [[TMP_8:%.*]] = add i8 [[TMP_6]], [[TMP_7]] +; AVX512F-NEXT: ret i8 [[TMP_8]] +; +; AVX512VL-LABEL: @PR31243_zext( +; AVX512VL-NEXT: entry: +; AVX512VL-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 +; AVX512VL-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 +; AVX512VL-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], +; AVX512VL-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0 +; AVX512VL-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64 +; AVX512VL-NEXT: [[TMP_4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]] +; AVX512VL-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1 +; AVX512VL-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i64 +; AVX512VL-NEXT: [[TMP_5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]] +; AVX512VL-NEXT: [[TMP_6:%.*]] = load i8, ptr [[TMP_4]], align 1 +; AVX512VL-NEXT: [[TMP_7:%.*]] = load i8, ptr [[TMP_5]], align 1 +; AVX512VL-NEXT: [[TMP_8:%.*]] = add i8 [[TMP_6]], [[TMP_7]] +; AVX512VL-NEXT: ret i8 [[TMP_8]] ; entry: %tmp_0 = zext i8 %v0 to i32 @@ -73,21 +128,64 @@ ; ; AVX-LABEL: @PR31243_sext( ; AVX-NEXT: entry: -; AVX-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 -; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 -; AVX-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], -; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i16> -; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[TMP3]], i64 0 -; AVX-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i64 -; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP5]] -; AVX-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP3]], i64 1 -; AVX-NEXT: [[TMP7:%.*]] = sext i16 [[TMP6]] to i64 -; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP7]] +; AVX-NEXT: [[TMP0:%.*]] = or i8 [[V0:%.*]], 1 +; AVX-NEXT: [[TMP1:%.*]] = or i8 [[V1:%.*]], 1 +; AVX-NEXT: [[TMP2:%.*]] = sext i8 [[TMP0]] to i64 +; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP2]] +; AVX-NEXT: [[TMP3:%.*]] = sext i8 [[TMP1]] to i64 +; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP3]] ; AVX-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 ; AVX-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 ; AVX-NEXT: [[TMP8:%.*]] = add i8 [[TMP6]], [[TMP7]] ; AVX-NEXT: ret i8 [[TMP8]] ; +; AVX2-LABEL: @PR31243_sext( +; AVX2-NEXT: entry: +; AVX2-NEXT: [[TMP0:%.*]] = or i8 [[V0:%.*]], 1 +; AVX2-NEXT: [[TMP1:%.*]] = or i8 [[V1:%.*]], 1 +; AVX2-NEXT: [[TMP2:%.*]] = sext i8 [[TMP0]] to i64 +; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP2]] +; AVX2-NEXT: [[TMP3:%.*]] = sext i8 [[TMP1]] to i64 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP3]] +; AVX2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; AVX2-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; AVX2-NEXT: [[TMP8:%.*]] = add i8 [[TMP6]], [[TMP7]] +; AVX2-NEXT: ret i8 [[TMP8]] +; +; AVX512F-LABEL: @PR31243_sext( +; AVX512F-NEXT: entry: +; AVX512F-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 +; AVX512F-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 +; AVX512F-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], +; AVX512F-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i16> +; AVX512F-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[TMP3]], i64 0 +; AVX512F-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i64 +; AVX512F-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP5]] +; AVX512F-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP3]], i64 1 +; AVX512F-NEXT: [[TMP7:%.*]] = sext i16 [[TMP6]] to i64 +; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP7]] +; AVX512F-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; AVX512F-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; AVX512F-NEXT: [[TMP8:%.*]] = add i8 [[TMP6]], [[TMP7]] +; AVX512F-NEXT: ret i8 [[TMP8]] +; +; AVX512VL-LABEL: @PR31243_sext( +; AVX512VL-NEXT: entry: +; AVX512VL-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 +; AVX512VL-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 +; AVX512VL-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], +; AVX512VL-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i16> +; AVX512VL-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[TMP3]], i64 0 +; AVX512VL-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i64 +; AVX512VL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP5]] +; AVX512VL-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP3]], i64 1 +; AVX512VL-NEXT: [[TMP7:%.*]] = sext i16 [[TMP6]] to i64 +; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP7]] +; AVX512VL-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; AVX512VL-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; AVX512VL-NEXT: [[TMP8:%.*]] = add i8 [[TMP6]], [[TMP7]] +; AVX512VL-NEXT: ret i8 [[TMP8]] +; entry: %tmp0 = sext i8 %v0 to i32 %tmp1 = sext i8 %v1 to i32 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll @@ -4,18 +4,29 @@ define void @test(ptr %r, ptr %p, ptr %q) #0 { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 0 +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 3 ; CHECK-NEXT: [[Q0:%.*]] = getelementptr inbounds i64, ptr [[Q:%.*]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[P0]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr [[Q0]], align 2 -; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <4 x i64> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 -; CHECK-NEXT: [[G0:%.*]] = getelementptr inbounds i32, ptr [[R:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 -; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 -; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 -; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP7]] +; CHECK-NEXT: [[Q1:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 1 +; CHECK-NEXT: [[Q2:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 2 +; CHECK-NEXT: [[Q3:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 3 +; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[P0]], align 2 +; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[P1]], align 2 +; CHECK-NEXT: [[X2:%.*]] = load i64, ptr [[P2]], align 2 +; CHECK-NEXT: [[X3:%.*]] = load i64, ptr [[P3]], align 2 +; CHECK-NEXT: [[Y0:%.*]] = load i64, ptr [[Q0]], align 2 +; CHECK-NEXT: [[Y1:%.*]] = load i64, ptr [[Q1]], align 2 +; CHECK-NEXT: [[Y2:%.*]] = load i64, ptr [[Q2]], align 2 +; CHECK-NEXT: [[Y3:%.*]] = load i64, ptr [[Q3]], align 2 +; CHECK-NEXT: [[SUB0:%.*]] = sub nsw i64 [[X0]], [[Y0]] +; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i64 [[X1]], [[Y1]] +; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i64 [[X2]], [[Y2]] +; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i64 [[X3]], [[Y3]] +; CHECK-NEXT: [[G0:%.*]] = getelementptr inbounds i32, ptr [[R:%.*]], i64 [[SUB0]] +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[SUB1]] +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[SUB2]] +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[SUB3]] ; CHECK-NEXT: ret void ; %p0 = getelementptr inbounds i64, ptr %p, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll @@ -13,28 +13,30 @@ ; CHECK: if.end: ; CHECK-NEXT: [[SUB14:%.*]] = sub nsw i32 [[Y_POS:%.*]], undef ; CHECK-NEXT: [[SHR15:%.*]] = ashr i32 [[SUB14]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[SHR15]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[SUB14]], i32 1 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i32> [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], undef -; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP4]], <4 x i32> undef -; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i32> [[TMP6]] to <4 x i64> -; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2 -; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP14]] -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 -; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP16]] +; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp sgt i32 [[SHR15]], 0 +; CHECK-NEXT: [[COND_I_I:%.*]] = select i1 [[CMP_I_I]], i32 [[SHR15]], i32 0 +; CHECK-NEXT: [[CMP_I4_I:%.*]] = icmp slt i32 [[COND_I_I]], undef +; CHECK-NEXT: [[COND_I5_I:%.*]] = select i1 [[CMP_I4_I]], i32 [[COND_I_I]], i32 undef +; CHECK-NEXT: [[IDXPROM30:%.*]] = sext i32 [[COND_I5_I]] to i64 +; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30]] +; CHECK-NEXT: [[CMP_I_I_1:%.*]] = icmp sgt i32 [[SUB14]], -1 +; CHECK-NEXT: [[COND_I_I_1:%.*]] = select i1 [[CMP_I_I_1]], i32 undef, i32 0 +; CHECK-NEXT: [[CMP_I4_I_1:%.*]] = icmp slt i32 [[COND_I_I_1]], undef +; CHECK-NEXT: [[COND_I5_I_1:%.*]] = select i1 [[CMP_I4_I_1]], i32 [[COND_I_I_1]], i32 undef +; CHECK-NEXT: [[IDXPROM30_1:%.*]] = sext i32 [[COND_I5_I_1]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30_1]] +; CHECK-NEXT: [[CMP_I_I_2:%.*]] = icmp sgt i32 [[SUB14]], -5 +; CHECK-NEXT: [[COND_I_I_2:%.*]] = select i1 [[CMP_I_I_2]], i32 undef, i32 0 +; CHECK-NEXT: [[CMP_I4_I_2:%.*]] = icmp slt i32 [[COND_I_I_2]], undef +; CHECK-NEXT: [[COND_I5_I_2:%.*]] = select i1 [[CMP_I4_I_2]], i32 [[COND_I_I_2]], i32 undef +; CHECK-NEXT: [[IDXPROM30_2:%.*]] = sext i32 [[COND_I5_I_2]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30_2]] +; CHECK-NEXT: [[CMP_I_I_3:%.*]] = icmp sgt i32 [[SUB14]], -9 +; CHECK-NEXT: [[COND_I_I_3:%.*]] = select i1 [[CMP_I_I_3]], i32 undef, i32 0 +; CHECK-NEXT: [[CMP_I4_I_3:%.*]] = icmp slt i32 [[COND_I_I_3]], undef +; CHECK-NEXT: [[COND_I5_I_3:%.*]] = select i1 [[CMP_I4_I_3]], i32 [[COND_I_I_3]], i32 undef +; CHECK-NEXT: [[IDXPROM30_3:%.*]] = sext i32 [[COND_I5_I_3]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30_3]] ; CHECK-NEXT: unreachable ; entry: