diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -814,6 +814,21 @@ return true; } +static bool isExtractElementReturningVectorPointer(const Value *V, + bool Const = false) { + const Instruction *VI = dyn_cast(V); + if (VI && VI->getOpcode() == Instruction::ExtractElement) { + VectorType *VT = dyn_cast(VI->getOperand(0)->getType()); + Value *ExtIdx = VI->getOperand(1); + PointerType *PtrTy = dyn_cast(VT->getElementType()); + if (PtrTy && + dyn_cast_or_null(PtrTy->getPointerElementType())) { + return Const != !dyn_cast(ExtIdx); + } + } + return false; +} + bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) { VectorType *VT = dyn_cast(EEI.getOperand(0)->getType()); if (!VT) @@ -825,8 +840,13 @@ Value *ExtIdx = EEI.getOperand(1); if (auto *CI = dyn_cast(ExtIdx)) { - Value *Res = Op0[CI->getValue().getZExtValue()]; - gather(&EEI, {Res}); + // Consider scenario when constant extractelement produce vector pointer. + // In such case we cannot scalarize extractelement so + // don't bother scattering and gathering it. + if (!isExtractElementReturningVectorPointer(&EEI, true)) { + Value *Res = Op0[CI->getValue().getZExtValue()]; + gather(&EEI, {Res}); + } return true; } @@ -908,7 +928,14 @@ unsigned NumElems = cast(Layout->VecTy)->getNumElements(); IRBuilder<> Builder(&LI); - Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), LI.getType()); + Value *Op = LI.getPointerOperand(); + // Consider scenario when vector load consume vector pointer produced by + // variable extractelement. In such case we already scalarized extractelement, + // so proper input for scatterer is first item from extractelement's scattered + // form. + if (isExtractElementReturningVectorPointer(Op)) + Op = Scattered[Op].front(); + Scatterer Ptr = scatter(&LI, Op, LI.getType()); ValueVector Res; Res.resize(NumElems); @@ -934,7 +961,14 @@ unsigned NumElems = cast(Layout->VecTy)->getNumElements(); IRBuilder<> Builder(&SI); - Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), FullValue->getType()); + Value *Op = SI.getPointerOperand(); + // Consider scenario when vector store consume vector pointer produced by + // variable extractelement. In such case we already scalarized extractelement, + // so proper input for scatterer is first item from extractelement's scattered + // form. + if (isExtractElementReturningVectorPointer(Op)) + Op = Scattered[Op].front(); + Scatterer VPtr = scatter(&SI, Op, FullValue->getType()); Scatterer VVal = scatter(&SI, FullValue); ValueVector Stores; diff --git a/llvm/test/Transforms/Scalarizer/constant-extractelement.ll b/llvm/test/Transforms/Scalarizer/constant-extractelement.ll --- a/llvm/test/Transforms/Scalarizer/constant-extractelement.ll +++ b/llvm/test/Transforms/Scalarizer/constant-extractelement.ll @@ -1,19 +1,310 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck --check-prefixes=ALL %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" ; Test that constant extracts are nicely scalarized -define i32 @f1(<4 x i32> *%src, i32 %index) { +define i32 @f1(<4 x i32> *%src) { ; ALL-LABEL: @f1( ; ALL-NEXT: [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32* ; ALL-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3 ; ALL-NEXT: [[VAL0_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4 -; ALL-NEXT: [[VAL2:%.*]] = shl i32 4, [[VAL0_I3]] -; ALL-NEXT: ret i32 [[VAL2]] +; ALL-NEXT: [[VAL1_I3:%.*]] = shl i32 4, [[VAL0_I3]] +; ALL-NEXT: ret i32 [[VAL1_I3]] ; %val0 = load <4 x i32> , <4 x i32> *%src %val1 = shl <4 x i32> , %val0 %val2 = extractelement <4 x i32> %val1, i32 3 ret i32 %val2 } + +; Test that constant extractelement returning vector pointer is properly scalarized without crash +define <64 x i32> @f2(<16 x <64 x i32>*> %src) { +; ALL-LABEL: @f2( +; ALL-NEXT: [[TMP1:%.*]] = extractelement <16 x <64 x i32>*> [[SRC:%.*]], i32 3 +; ALL-NEXT: [[DOTI0:%.*]] = bitcast <64 x i32>* [[TMP1]] to i32* +; ALL-NEXT: [[DOTI01:%.*]] = load i32, i32* [[DOTI0]], align 4 +; ALL-NEXT: [[DOTI1:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 1 +; ALL-NEXT: [[DOTI12:%.*]] = load i32, i32* [[DOTI1]], align 4 +; ALL-NEXT: [[DOTI2:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 2 +; ALL-NEXT: [[DOTI23:%.*]] = load i32, i32* [[DOTI2]], align 4 +; ALL-NEXT: [[DOTI3:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 3 +; ALL-NEXT: [[DOTI34:%.*]] = load i32, i32* [[DOTI3]], align 4 +; ALL-NEXT: [[DOTI4:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 4 +; ALL-NEXT: [[DOTI45:%.*]] = load i32, i32* [[DOTI4]], align 4 +; ALL-NEXT: [[DOTI5:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 5 +; ALL-NEXT: [[DOTI56:%.*]] = load i32, i32* [[DOTI5]], align 4 +; ALL-NEXT: [[DOTI6:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 6 +; ALL-NEXT: [[DOTI67:%.*]] = load i32, i32* [[DOTI6]], align 4 +; ALL-NEXT: [[DOTI7:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 7 +; ALL-NEXT: [[DOTI78:%.*]] = load i32, i32* [[DOTI7]], align 4 +; ALL-NEXT: [[DOTI8:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 8 +; ALL-NEXT: [[DOTI89:%.*]] = load i32, i32* [[DOTI8]], align 4 +; ALL-NEXT: [[DOTI9:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 9 +; ALL-NEXT: [[DOTI910:%.*]] = load i32, i32* [[DOTI9]], align 4 +; ALL-NEXT: [[DOTI10:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 10 +; ALL-NEXT: [[DOTI1011:%.*]] = load i32, i32* [[DOTI10]], align 4 +; ALL-NEXT: [[DOTI11:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 11 +; ALL-NEXT: [[DOTI1112:%.*]] = load i32, i32* [[DOTI11]], align 4 +; ALL-NEXT: [[DOTI1213:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 12 +; ALL-NEXT: [[DOTI1214:%.*]] = load i32, i32* [[DOTI1213]], align 4 +; ALL-NEXT: [[DOTI13:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 13 +; ALL-NEXT: [[DOTI1315:%.*]] = load i32, i32* [[DOTI13]], align 4 +; ALL-NEXT: [[DOTI14:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 14 +; ALL-NEXT: [[DOTI1416:%.*]] = load i32, i32* [[DOTI14]], align 4 +; ALL-NEXT: [[DOTI15:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 15 +; ALL-NEXT: [[DOTI1517:%.*]] = load i32, i32* [[DOTI15]], align 4 +; ALL-NEXT: [[DOTI16:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 16 +; ALL-NEXT: [[DOTI1618:%.*]] = load i32, i32* [[DOTI16]], align 4 +; ALL-NEXT: [[DOTI17:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 17 +; ALL-NEXT: [[DOTI1719:%.*]] = load i32, i32* [[DOTI17]], align 4 +; ALL-NEXT: [[DOTI18:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 18 +; ALL-NEXT: [[DOTI1820:%.*]] = load i32, i32* [[DOTI18]], align 4 +; ALL-NEXT: [[DOTI19:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 19 +; ALL-NEXT: [[DOTI1921:%.*]] = load i32, i32* [[DOTI19]], align 4 +; ALL-NEXT: [[DOTI20:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 20 +; ALL-NEXT: [[DOTI2022:%.*]] = load i32, i32* [[DOTI20]], align 4 +; ALL-NEXT: [[DOTI21:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 21 +; ALL-NEXT: [[DOTI2123:%.*]] = load i32, i32* [[DOTI21]], align 4 +; ALL-NEXT: [[DOTI22:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 22 +; ALL-NEXT: [[DOTI2224:%.*]] = load i32, i32* [[DOTI22]], align 4 +; ALL-NEXT: [[DOTI2325:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 23 +; ALL-NEXT: [[DOTI2326:%.*]] = load i32, i32* [[DOTI2325]], align 4 +; ALL-NEXT: [[DOTI24:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 24 +; ALL-NEXT: [[DOTI2427:%.*]] = load i32, i32* [[DOTI24]], align 4 +; ALL-NEXT: [[DOTI25:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 25 +; ALL-NEXT: [[DOTI2528:%.*]] = load i32, i32* [[DOTI25]], align 4 +; ALL-NEXT: [[DOTI26:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 26 +; ALL-NEXT: [[DOTI2629:%.*]] = load i32, i32* [[DOTI26]], align 4 +; ALL-NEXT: [[DOTI27:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 27 +; ALL-NEXT: [[DOTI2730:%.*]] = load i32, i32* [[DOTI27]], align 4 +; ALL-NEXT: [[DOTI28:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 28 +; ALL-NEXT: [[DOTI2831:%.*]] = load i32, i32* [[DOTI28]], align 4 +; ALL-NEXT: [[DOTI29:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 29 +; ALL-NEXT: [[DOTI2932:%.*]] = load i32, i32* [[DOTI29]], align 4 +; ALL-NEXT: [[DOTI30:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 30 +; ALL-NEXT: [[DOTI3033:%.*]] = load i32, i32* [[DOTI30]], align 4 +; ALL-NEXT: [[DOTI31:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 31 +; ALL-NEXT: [[DOTI3134:%.*]] = load i32, i32* [[DOTI31]], align 4 +; ALL-NEXT: [[DOTI32:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 32 +; ALL-NEXT: [[DOTI3235:%.*]] = load i32, i32* [[DOTI32]], align 4 +; ALL-NEXT: [[DOTI33:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 33 +; ALL-NEXT: [[DOTI3336:%.*]] = load i32, i32* [[DOTI33]], align 4 +; ALL-NEXT: [[DOTI3437:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 34 +; ALL-NEXT: [[DOTI3438:%.*]] = load i32, i32* [[DOTI3437]], align 4 +; ALL-NEXT: [[DOTI35:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 35 +; ALL-NEXT: [[DOTI3539:%.*]] = load i32, i32* [[DOTI35]], align 4 +; ALL-NEXT: [[DOTI36:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 36 +; ALL-NEXT: [[DOTI3640:%.*]] = load i32, i32* [[DOTI36]], align 4 +; ALL-NEXT: [[DOTI37:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 37 +; ALL-NEXT: [[DOTI3741:%.*]] = load i32, i32* [[DOTI37]], align 4 +; ALL-NEXT: [[DOTI38:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 38 +; ALL-NEXT: [[DOTI3842:%.*]] = load i32, i32* [[DOTI38]], align 4 +; ALL-NEXT: [[DOTI39:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 39 +; ALL-NEXT: [[DOTI3943:%.*]] = load i32, i32* [[DOTI39]], align 4 +; ALL-NEXT: [[DOTI40:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 40 +; ALL-NEXT: [[DOTI4044:%.*]] = load i32, i32* [[DOTI40]], align 4 +; ALL-NEXT: [[DOTI41:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 41 +; ALL-NEXT: [[DOTI4145:%.*]] = load i32, i32* [[DOTI41]], align 4 +; ALL-NEXT: [[DOTI42:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 42 +; ALL-NEXT: [[DOTI4246:%.*]] = load i32, i32* [[DOTI42]], align 4 +; ALL-NEXT: [[DOTI43:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 43 +; ALL-NEXT: [[DOTI4347:%.*]] = load i32, i32* [[DOTI43]], align 4 +; ALL-NEXT: [[DOTI44:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 44 +; ALL-NEXT: [[DOTI4448:%.*]] = load i32, i32* [[DOTI44]], align 4 +; ALL-NEXT: [[DOTI4549:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 45 +; ALL-NEXT: [[DOTI4550:%.*]] = load i32, i32* [[DOTI4549]], align 4 +; ALL-NEXT: [[DOTI46:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 46 +; ALL-NEXT: [[DOTI4651:%.*]] = load i32, i32* [[DOTI46]], align 4 +; ALL-NEXT: [[DOTI47:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 47 +; ALL-NEXT: [[DOTI4752:%.*]] = load i32, i32* [[DOTI47]], align 4 +; ALL-NEXT: [[DOTI48:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 48 +; ALL-NEXT: [[DOTI4853:%.*]] = load i32, i32* [[DOTI48]], align 4 +; ALL-NEXT: [[DOTI49:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 49 +; ALL-NEXT: [[DOTI4954:%.*]] = load i32, i32* [[DOTI49]], align 4 +; ALL-NEXT: [[DOTI50:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 50 +; ALL-NEXT: [[DOTI5055:%.*]] = load i32, i32* [[DOTI50]], align 4 +; ALL-NEXT: [[DOTI51:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 51 +; ALL-NEXT: [[DOTI5156:%.*]] = load i32, i32* [[DOTI51]], align 4 +; ALL-NEXT: [[DOTI52:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 52 +; ALL-NEXT: [[DOTI5257:%.*]] = load i32, i32* [[DOTI52]], align 4 +; ALL-NEXT: [[DOTI53:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 53 +; ALL-NEXT: [[DOTI5358:%.*]] = load i32, i32* [[DOTI53]], align 4 +; ALL-NEXT: [[DOTI54:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 54 +; ALL-NEXT: [[DOTI5459:%.*]] = load i32, i32* [[DOTI54]], align 4 +; ALL-NEXT: [[DOTI55:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 55 +; ALL-NEXT: [[DOTI5560:%.*]] = load i32, i32* [[DOTI55]], align 4 +; ALL-NEXT: [[DOTI5661:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 56 +; ALL-NEXT: [[DOTI5662:%.*]] = load i32, i32* [[DOTI5661]], align 4 +; ALL-NEXT: [[DOTI57:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 57 +; ALL-NEXT: [[DOTI5763:%.*]] = load i32, i32* [[DOTI57]], align 4 +; ALL-NEXT: [[DOTI58:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 58 +; ALL-NEXT: [[DOTI5864:%.*]] = load i32, i32* [[DOTI58]], align 4 +; ALL-NEXT: [[DOTI59:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 59 +; ALL-NEXT: [[DOTI5965:%.*]] = load i32, i32* [[DOTI59]], align 4 +; ALL-NEXT: [[DOTI60:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 60 +; ALL-NEXT: [[DOTI6066:%.*]] = load i32, i32* [[DOTI60]], align 4 +; ALL-NEXT: [[DOTI61:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 61 +; ALL-NEXT: [[DOTI6167:%.*]] = load i32, i32* [[DOTI61]], align 4 +; ALL-NEXT: [[DOTI62:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 62 +; ALL-NEXT: [[DOTI6268:%.*]] = load i32, i32* [[DOTI62]], align 4 +; ALL-NEXT: [[DOTI63:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 63 +; ALL-NEXT: [[DOTI6369:%.*]] = load i32, i32* [[DOTI63]], align 4 +; ALL-NEXT: [[DOTUPTO0:%.*]] = insertelement <64 x i32> poison, i32 [[DOTI01]], i32 0 +; ALL-NEXT: [[DOTUPTO1:%.*]] = insertelement <64 x i32> [[DOTUPTO0]], i32 [[DOTI12]], i32 1 +; ALL-NEXT: [[DOTUPTO2:%.*]] = insertelement <64 x i32> [[DOTUPTO1]], i32 [[DOTI23]], i32 2 +; ALL-NEXT: [[DOTUPTO3:%.*]] = insertelement <64 x i32> [[DOTUPTO2]], i32 [[DOTI34]], i32 3 +; ALL-NEXT: [[DOTUPTO4:%.*]] = insertelement <64 x i32> [[DOTUPTO3]], i32 [[DOTI45]], i32 4 +; ALL-NEXT: [[DOTUPTO5:%.*]] = insertelement <64 x i32> [[DOTUPTO4]], i32 [[DOTI56]], i32 5 +; ALL-NEXT: [[DOTUPTO6:%.*]] = insertelement <64 x i32> [[DOTUPTO5]], i32 [[DOTI67]], i32 6 +; ALL-NEXT: [[DOTUPTO7:%.*]] = insertelement <64 x i32> [[DOTUPTO6]], i32 [[DOTI78]], i32 7 +; ALL-NEXT: [[DOTUPTO8:%.*]] = insertelement <64 x i32> [[DOTUPTO7]], i32 [[DOTI89]], i32 8 +; ALL-NEXT: [[DOTUPTO9:%.*]] = insertelement <64 x i32> [[DOTUPTO8]], i32 [[DOTI910]], i32 9 +; ALL-NEXT: [[DOTUPTO10:%.*]] = insertelement <64 x i32> [[DOTUPTO9]], i32 [[DOTI1011]], i32 10 +; ALL-NEXT: [[DOTUPTO11:%.*]] = insertelement <64 x i32> [[DOTUPTO10]], i32 [[DOTI1112]], i32 11 +; ALL-NEXT: [[DOTUPTO12:%.*]] = insertelement <64 x i32> [[DOTUPTO11]], i32 [[DOTI1214]], i32 12 +; ALL-NEXT: [[DOTUPTO13:%.*]] = insertelement <64 x i32> [[DOTUPTO12]], i32 [[DOTI1315]], i32 13 +; ALL-NEXT: [[DOTUPTO14:%.*]] = insertelement <64 x i32> [[DOTUPTO13]], i32 [[DOTI1416]], i32 14 +; ALL-NEXT: [[DOTUPTO15:%.*]] = insertelement <64 x i32> [[DOTUPTO14]], i32 [[DOTI1517]], i32 15 +; ALL-NEXT: [[DOTUPTO16:%.*]] = insertelement <64 x i32> [[DOTUPTO15]], i32 [[DOTI1618]], i32 16 +; ALL-NEXT: [[DOTUPTO17:%.*]] = insertelement <64 x i32> [[DOTUPTO16]], i32 [[DOTI1719]], i32 17 +; ALL-NEXT: [[DOTUPTO18:%.*]] = insertelement <64 x i32> [[DOTUPTO17]], i32 [[DOTI1820]], i32 18 +; ALL-NEXT: [[DOTUPTO19:%.*]] = insertelement <64 x i32> [[DOTUPTO18]], i32 [[DOTI1921]], i32 19 +; ALL-NEXT: [[DOTUPTO20:%.*]] = insertelement <64 x i32> [[DOTUPTO19]], i32 [[DOTI2022]], i32 20 +; ALL-NEXT: [[DOTUPTO21:%.*]] = insertelement <64 x i32> [[DOTUPTO20]], i32 [[DOTI2123]], i32 21 +; ALL-NEXT: [[DOTUPTO22:%.*]] = insertelement <64 x i32> [[DOTUPTO21]], i32 [[DOTI2224]], i32 22 +; ALL-NEXT: [[DOTUPTO23:%.*]] = insertelement <64 x i32> [[DOTUPTO22]], i32 [[DOTI2326]], i32 23 +; ALL-NEXT: [[DOTUPTO24:%.*]] = insertelement <64 x i32> [[DOTUPTO23]], i32 [[DOTI2427]], i32 24 +; ALL-NEXT: [[DOTUPTO25:%.*]] = insertelement <64 x i32> [[DOTUPTO24]], i32 [[DOTI2528]], i32 25 +; ALL-NEXT: [[DOTUPTO26:%.*]] = insertelement <64 x i32> [[DOTUPTO25]], i32 [[DOTI2629]], i32 26 +; ALL-NEXT: [[DOTUPTO27:%.*]] = insertelement <64 x i32> [[DOTUPTO26]], i32 [[DOTI2730]], i32 27 +; ALL-NEXT: [[DOTUPTO28:%.*]] = insertelement <64 x i32> [[DOTUPTO27]], i32 [[DOTI2831]], i32 28 +; ALL-NEXT: [[DOTUPTO29:%.*]] = insertelement <64 x i32> [[DOTUPTO28]], i32 [[DOTI2932]], i32 29 +; ALL-NEXT: [[DOTUPTO30:%.*]] = insertelement <64 x i32> [[DOTUPTO29]], i32 [[DOTI3033]], i32 30 +; ALL-NEXT: [[DOTUPTO31:%.*]] = insertelement <64 x i32> [[DOTUPTO30]], i32 [[DOTI3134]], i32 31 +; ALL-NEXT: [[DOTUPTO32:%.*]] = insertelement <64 x i32> [[DOTUPTO31]], i32 [[DOTI3235]], i32 32 +; ALL-NEXT: [[DOTUPTO33:%.*]] = insertelement <64 x i32> [[DOTUPTO32]], i32 [[DOTI3336]], i32 33 +; ALL-NEXT: [[DOTUPTO34:%.*]] = insertelement <64 x i32> [[DOTUPTO33]], i32 [[DOTI3438]], i32 34 +; ALL-NEXT: [[DOTUPTO35:%.*]] = insertelement <64 x i32> [[DOTUPTO34]], i32 [[DOTI3539]], i32 35 +; ALL-NEXT: [[DOTUPTO36:%.*]] = insertelement <64 x i32> [[DOTUPTO35]], i32 [[DOTI3640]], i32 36 +; ALL-NEXT: [[DOTUPTO37:%.*]] = insertelement <64 x i32> [[DOTUPTO36]], i32 [[DOTI3741]], i32 37 +; ALL-NEXT: [[DOTUPTO38:%.*]] = insertelement <64 x i32> [[DOTUPTO37]], i32 [[DOTI3842]], i32 38 +; ALL-NEXT: [[DOTUPTO39:%.*]] = insertelement <64 x i32> [[DOTUPTO38]], i32 [[DOTI3943]], i32 39 +; ALL-NEXT: [[DOTUPTO40:%.*]] = insertelement <64 x i32> [[DOTUPTO39]], i32 [[DOTI4044]], i32 40 +; ALL-NEXT: [[DOTUPTO41:%.*]] = insertelement <64 x i32> [[DOTUPTO40]], i32 [[DOTI4145]], i32 41 +; ALL-NEXT: [[DOTUPTO42:%.*]] = insertelement <64 x i32> [[DOTUPTO41]], i32 [[DOTI4246]], i32 42 +; ALL-NEXT: [[DOTUPTO43:%.*]] = insertelement <64 x i32> [[DOTUPTO42]], i32 [[DOTI4347]], i32 43 +; ALL-NEXT: [[DOTUPTO44:%.*]] = insertelement <64 x i32> [[DOTUPTO43]], i32 [[DOTI4448]], i32 44 +; ALL-NEXT: [[DOTUPTO45:%.*]] = insertelement <64 x i32> [[DOTUPTO44]], i32 [[DOTI4550]], i32 45 +; ALL-NEXT: [[DOTUPTO46:%.*]] = insertelement <64 x i32> [[DOTUPTO45]], i32 [[DOTI4651]], i32 46 +; ALL-NEXT: [[DOTUPTO47:%.*]] = insertelement <64 x i32> [[DOTUPTO46]], i32 [[DOTI4752]], i32 47 +; ALL-NEXT: [[DOTUPTO48:%.*]] = insertelement <64 x i32> [[DOTUPTO47]], i32 [[DOTI4853]], i32 48 +; ALL-NEXT: [[DOTUPTO49:%.*]] = insertelement <64 x i32> [[DOTUPTO48]], i32 [[DOTI4954]], i32 49 +; ALL-NEXT: [[DOTUPTO50:%.*]] = insertelement <64 x i32> [[DOTUPTO49]], i32 [[DOTI5055]], i32 50 +; ALL-NEXT: [[DOTUPTO51:%.*]] = insertelement <64 x i32> [[DOTUPTO50]], i32 [[DOTI5156]], i32 51 +; ALL-NEXT: [[DOTUPTO52:%.*]] = insertelement <64 x i32> [[DOTUPTO51]], i32 [[DOTI5257]], i32 52 +; ALL-NEXT: [[DOTUPTO53:%.*]] = insertelement <64 x i32> [[DOTUPTO52]], i32 [[DOTI5358]], i32 53 +; ALL-NEXT: [[DOTUPTO54:%.*]] = insertelement <64 x i32> [[DOTUPTO53]], i32 [[DOTI5459]], i32 54 +; ALL-NEXT: [[DOTUPTO55:%.*]] = insertelement <64 x i32> [[DOTUPTO54]], i32 [[DOTI5560]], i32 55 +; ALL-NEXT: [[DOTUPTO56:%.*]] = insertelement <64 x i32> [[DOTUPTO55]], i32 [[DOTI5662]], i32 56 +; ALL-NEXT: [[DOTUPTO57:%.*]] = insertelement <64 x i32> [[DOTUPTO56]], i32 [[DOTI5763]], i32 57 +; ALL-NEXT: [[DOTUPTO58:%.*]] = insertelement <64 x i32> [[DOTUPTO57]], i32 [[DOTI5864]], i32 58 +; ALL-NEXT: [[DOTUPTO59:%.*]] = insertelement <64 x i32> [[DOTUPTO58]], i32 [[DOTI5965]], i32 59 +; ALL-NEXT: [[DOTUPTO60:%.*]] = insertelement <64 x i32> [[DOTUPTO59]], i32 [[DOTI6066]], i32 60 +; ALL-NEXT: [[DOTUPTO61:%.*]] = insertelement <64 x i32> [[DOTUPTO60]], i32 [[DOTI6167]], i32 61 +; ALL-NEXT: [[DOTUPTO62:%.*]] = insertelement <64 x i32> [[DOTUPTO61]], i32 [[DOTI6268]], i32 62 +; ALL-NEXT: [[TMP2:%.*]] = insertelement <64 x i32> [[DOTUPTO62]], i32 [[DOTI6369]], i32 63 +; ALL-NEXT: ret <64 x i32> [[TMP2]] +; + %1 = extractelement <16 x <64 x i32>*> %src, i32 3 + %2 = load <64 x i32>, <64 x i32>* %1, align 4 + ret <64 x i32> %2 +} + +; Test that constant extractelement returning vector pointer is properly scalarized without crash +define <2 x i32> @f3(<4 x <2 x i32>*> %src, <2 x i32> %value) { +; ALL-LABEL: @f3( +; ALL-NEXT: [[VALUE_I0:%.*]] = extractelement <2 x i32> [[VALUE:%.*]], i32 0 +; ALL-NEXT: [[VALUE_I1:%.*]] = extractelement <2 x i32> [[VALUE]], i32 1 +; ALL-NEXT: [[TMP1:%.*]] = extractelement <4 x <2 x i32>*> [[SRC:%.*]], i32 3 +; ALL-NEXT: [[DOTI0:%.*]] = bitcast <2 x i32>* [[TMP1]] to i32* +; ALL-NEXT: store i32 [[VALUE_I0]], i32* [[DOTI0]], align 8 +; ALL-NEXT: [[DOTI1:%.*]] = getelementptr i32, i32* [[DOTI0]], i32 1 +; ALL-NEXT: store i32 [[VALUE_I1]], i32* [[DOTI1]], align 4 +; ALL-NEXT: [[DOTI01:%.*]] = load i32, i32* [[DOTI0]], align 4 +; ALL-NEXT: [[DOTI12:%.*]] = load i32, i32* [[DOTI1]], align 4 +; ALL-NEXT: [[DOTUPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[DOTI01]], i32 0 +; ALL-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[DOTUPTO0]], i32 [[DOTI12]], i32 1 +; ALL-NEXT: ret <2 x i32> [[TMP2]] +; + %1 = extractelement <4 x <2 x i32>*> %src, i32 3 + store <2 x i32> %value, <2 x i32>* %1 + %2 = load <2 x i32>, <2 x i32>* %1, align 4 + ret <2 x i32> %2 +} + +declare void @external1(<2 x i32*> %v) +declare void @external2(<4 x <2 x i32*>*> %v) + +; Test that vector pointer returned by constant extractelement is properly handled +; in other instructions +define <2 x i32*>* @f4(<4 x <2 x i32*>*> %src, <2 x i32*> %value) { +; ALL-LABEL: @f4( +; ALL-NEXT: [[SRC_I1:%.*]] = extractelement <4 x <2 x i32*>*> [[SRC:%.*]], i32 1 +; ALL-NEXT: [[SRC_I2:%.*]] = extractelement <4 x <2 x i32*>*> [[SRC]], i32 2 +; ALL-NEXT: [[SRC_I3:%.*]] = extractelement <4 x <2 x i32*>*> [[SRC]], i32 3 +; ALL-NEXT: [[VALUE_I0:%.*]] = extractelement <2 x i32*> [[VALUE:%.*]], i32 0 +; ALL-NEXT: [[VALUE_I1:%.*]] = extractelement <2 x i32*> [[VALUE]], i32 1 +; ALL-NEXT: [[TMP1:%.*]] = extractelement <4 x <2 x i32*>*> [[SRC]], i32 3 +; ALL-NEXT: [[DOTI0:%.*]] = bitcast <2 x i32*>* [[TMP1]] to i32** +; ALL-NEXT: [[DOTI01:%.*]] = load i32*, i32** [[DOTI0]], align 4 +; ALL-NEXT: [[DOTI1:%.*]] = getelementptr i32*, i32** [[DOTI0]], i32 1 +; ALL-NEXT: [[DOTI12:%.*]] = load i32*, i32** [[DOTI1]], align 4 +; ALL-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTI12]], align 4 +; ALL-NEXT: store i32 [[TMP2]], i32* [[DOTI12]], align 4 +; ALL-NEXT: [[TMP3:%.*]] = getelementptr <2 x i32*>, <2 x i32*>* [[TMP1]], i32 2 +; ALL-NEXT: [[DOTI03:%.*]] = bitcast <2 x i32*>* [[TMP3]] to i32** +; ALL-NEXT: [[DOTI14:%.*]] = getelementptr i32*, i32** [[DOTI03]], i32 1 +; ALL-NEXT: [[TMP4:%.*]] = bitcast <2 x i32*>* [[TMP3]] to i32** +; ALL-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 +; ALL-NEXT: store i32 [[TMP2]], i32* [[TMP5]], align 4 +; ALL-NEXT: store i32* [[VALUE_I0]], i32** [[DOTI0]], align 16 +; ALL-NEXT: store i32* [[VALUE_I1]], i32** [[DOTI1]], align 8 +; ALL-NEXT: [[TMP6:%.*]] = bitcast <2 x i32*>* [[TMP1]] to i32** +; ALL-NEXT: store i32* [[VALUE_I0]], i32** [[DOTI03]], align 16 +; ALL-NEXT: store i32* [[VALUE_I1]], i32** [[DOTI14]], align 8 +; ALL-NEXT: [[TMP7:%.*]] = load i32*, i32** [[TMP6]], align 8 +; ALL-NEXT: [[DOTUPTO05:%.*]] = insertelement <2 x i32*> poison, i32* [[DOTI01]], i32 0 +; ALL-NEXT: [[TMP8:%.*]] = insertelement <2 x i32*> [[DOTUPTO05]], i32* [[TMP7]], i32 1 +; ALL-NEXT: call void @external1(<2 x i32*> [[TMP8]]) +; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP2]], 0 +; ALL-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], <2 x i32*>* [[TMP1]], <2 x i32*>* undef +; ALL-NEXT: [[DOTUPTO06:%.*]] = insertelement <4 x <2 x i32*>*> poison, <2 x i32*>* [[TMP10]], i32 0 +; ALL-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x <2 x i32*>*> [[DOTUPTO06]], <2 x i32*>* [[SRC_I1]], i32 1 +; ALL-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x <2 x i32*>*> [[DOTUPTO1]], <2 x i32*>* [[SRC_I2]], i32 2 +; ALL-NEXT: [[TMP11:%.*]] = insertelement <4 x <2 x i32*>*> [[DOTUPTO2]], <2 x i32*>* [[SRC_I3]], i32 3 +; ALL-NEXT: call void @external2(<4 x <2 x i32*>*> [[TMP11]]) +; ALL-NEXT: ret <2 x i32*>* [[TMP1]] +; + %1 = extractelement <4 x <2 x i32*>*> %src, i32 3 + %2 = load <2 x i32*>, <2 x i32*>* %1, align 4 + %3 = extractelement <2 x i32*> %2, i32 1 + %4 = load i32, i32* %3, align 4 + store i32 %4, i32* %3 + %5 = getelementptr <2 x i32*>, <2 x i32*>* %1, i32 2 + %6 = bitcast <2 x i32*>* %5 to i32** + %7 = load i32*, i32** %6 + store i32 %4, i32* %7 + store <2 x i32*> %value, <2 x i32*>* %1 + %8 = bitcast <2 x i32*>* %1 to i32** + store <2 x i32*> %value, <2 x i32*>* %5 + %9 = insertelement <4 x <2 x i32*>*> %src, <2 x i32*>* %1, i32 1 + %10 = load i32*, i32** %8 + %11 = insertelement <2 x i32*> %2, i32* %10, i32 1 + call void @external1(<2 x i32*> %11) + %12 = icmp eq i32 %4, 0 + %13 = select i1 %12, <2 x i32*>* %1, <2 x i32*>* undef + %14 = insertelement <4 x <2 x i32*>*> %src, <2 x i32*>* %13, i32 0 + call void @external2(<4 x <2 x i32*>*> %14) + ret <2 x i32*>* %1 +} diff --git a/llvm/test/Transforms/Scalarizer/variable-extractelement.ll b/llvm/test/Transforms/Scalarizer/variable-extractelement.ll --- a/llvm/test/Transforms/Scalarizer/variable-extractelement.ll +++ b/llvm/test/Transforms/Scalarizer/variable-extractelement.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt %s -passes='function(scalarizer,dce)' -S | FileCheck --check-prefix=DEFAULT %s ; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-variable-insert-extract=false -S | FileCheck --check-prefix=OFF %s ; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-variable-insert-extract=true -S | FileCheck --check-prefix=DEFAULT %s @@ -19,8 +19,8 @@ ; DEFAULT-NEXT: [[RES_UPTO2:%.*]] = select i1 [[INDEX_IS_2]], i32 [[SRC_I2]], i32 [[RES_UPTO1]] ; DEFAULT-NEXT: [[INDEX_IS_3:%.*]] = icmp eq i32 [[INDEX]], 3 ; DEFAULT-NEXT: [[SRC_I3:%.*]] = extractelement <4 x i32> [[SRC]], i32 3 -; DEFAULT-NEXT: [[RES:%.*]] = select i1 [[INDEX_IS_3]], i32 [[SRC_I3]], i32 [[RES_UPTO2]] -; DEFAULT-NEXT: ret i32 [[RES]] +; DEFAULT-NEXT: [[RES_UPTO3:%.*]] = select i1 [[INDEX_IS_3]], i32 [[SRC_I3]], i32 [[RES_UPTO2]] +; DEFAULT-NEXT: ret i32 [[RES_UPTO3]] ; ; OFF-LABEL: @f1( ; OFF-NEXT: [[RES:%.*]] = extractelement <4 x i32> [[SRC:%.*]], i32 [[INDEX:%.*]] @@ -48,8 +48,8 @@ ; DEFAULT-NEXT: [[INDEX_IS_2:%.*]] = icmp eq i32 [[INDEX]], 2 ; DEFAULT-NEXT: [[VAL2_UPTO2:%.*]] = select i1 [[INDEX_IS_2]], i32 [[VAL1_I2]], i32 [[VAL2_UPTO1]] ; DEFAULT-NEXT: [[INDEX_IS_3:%.*]] = icmp eq i32 [[INDEX]], 3 -; DEFAULT-NEXT: [[VAL2:%.*]] = select i1 [[INDEX_IS_3]], i32 [[VAL1_I3]], i32 [[VAL2_UPTO2]] -; DEFAULT-NEXT: ret i32 [[VAL2]] +; DEFAULT-NEXT: [[VAL2_UPTO3:%.*]] = select i1 [[INDEX_IS_3]], i32 [[VAL1_I3]], i32 [[VAL2_UPTO2]] +; DEFAULT-NEXT: ret i32 [[VAL2_UPTO3]] ; ; OFF-LABEL: @f2( ; OFF-NEXT: [[VAL0:%.*]] = load <4 x i32>, <4 x i32>* [[SRC:%.*]], align 16 @@ -73,3 +73,195 @@ %val2 = extractelement <4 x i32> %val1, i32 %index ret i32 %val2 } + +; PR54469; Test that variable extractelement returning vector pointer is properly scalarized without crash. +define <64 x i32> @f3(<16 x <64 x i32>*> %src, i32 %index) { +; DEFAULT-LABEL: @f3( +; DEFAULT-NEXT: [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0 +; DEFAULT-NEXT: [[SRC_I0:%.*]] = extractelement <16 x <64 x i32>*> [[SRC:%.*]], i32 0 +; DEFAULT-NEXT: [[DOTUPTO0:%.*]] = select i1 [[INDEX_IS_0]], <64 x i32>* [[SRC_I0]], <64 x i32>* undef +; DEFAULT-NEXT: [[INDEX_IS_1:%.*]] = icmp eq i32 [[INDEX]], 1 +; DEFAULT-NEXT: [[SRC_I1:%.*]] = extractelement <16 x <64 x i32>*> [[SRC]], i32 1 +; DEFAULT-NEXT: [[DOTUPTO1:%.*]] = select i1 [[INDEX_IS_1]], <64 x i32>* [[SRC_I1]], <64 x i32>* [[DOTUPTO0]] +; DEFAULT-NEXT: [[INDEX_IS_2:%.*]] = icmp eq i32 [[INDEX]], 2 +; DEFAULT-NEXT: [[SRC_I2:%.*]] = extractelement <16 x <64 x i32>*> [[SRC]], i32 2 +; DEFAULT-NEXT: [[DOTUPTO2:%.*]] = select i1 [[INDEX_IS_2]], <64 x i32>* [[SRC_I2]], <64 x i32>* [[DOTUPTO1]] +; DEFAULT-NEXT: [[INDEX_IS_3:%.*]] = icmp eq i32 [[INDEX]], 3 +; DEFAULT-NEXT: [[SRC_I3:%.*]] = extractelement <16 x <64 x i32>*> [[SRC]], i32 3 +; DEFAULT-NEXT: [[DOTUPTO3:%.*]] = select i1 [[INDEX_IS_3]], <64 x i32>* [[SRC_I3]], <64 x i32>* [[DOTUPTO2]] +; DEFAULT-NEXT: [[INDEX_IS_4:%.*]] = icmp eq i32 [[INDEX]], 4 +; DEFAULT-NEXT: [[SRC_I4:%.*]] = extractelement <16 x <64 x i32>*> [[SRC]], i32 4 +; DEFAULT-NEXT: [[DOTUPTO4:%.*]] = select i1 [[INDEX_IS_4]], <64 x i32>* [[SRC_I4]], <64 x i32>* [[DOTUPTO3]] +; DEFAULT-NEXT: [[INDEX_IS_5:%.*]] = icmp eq i32 [[INDEX]], 5 +; DEFAULT-NEXT: [[SRC_I5:%.*]] = extractelement <16 x <64 x i32>*> [[SRC]], i32 5 +; DEFAULT-NEXT: [[DOTUPTO5:%.*]] = select i1 [[INDEX_IS_5]], <64 x i32>* [[SRC_I5]], <64 x i32>* [[DOTUPTO4]] +; DEFAULT-NEXT: [[INDEX_IS_6:%.*]] = icmp eq i32 [[INDEX]], 6 +; DEFAULT-NEXT: [[SRC_I6:%.*]] = extractelement <16 x <64 x i32>*> [[SRC]], i32 6 +; DEFAULT-NEXT: [[DOTUPTO6:%.*]] = select i1 [[INDEX_IS_6]], <64 x i32>* [[SRC_I6]], <64 x i32>* [[DOTUPTO5]] +; DEFAULT-NEXT: [[INDEX_IS_7:%.*]] = icmp eq i32 [[INDEX]], 7 +; DEFAULT-NEXT: [[SRC_I7:%.*]] = extractelement <16 x <64 x i32>*> [[SRC]], i32 7 +; DEFAULT-NEXT: [[DOTUPTO7:%.*]] = select i1 [[INDEX_IS_7]], <64 x i32>* [[SRC_I7]], <64 x i32>* [[DOTUPTO6]] +; DEFAULT-NEXT: [[INDEX_IS_8:%.*]] = icmp eq i32 [[INDEX]], 8 +; DEFAULT-NEXT: [[SRC_I8:%.*]] = extractelement <16 x <64 x i32>*> [[SRC]], i32 8 +; DEFAULT-NEXT: [[DOTUPTO8:%.*]] = select i1 [[INDEX_IS_8]], <64 x i32>* [[SRC_I8]], <64 x i32>* [[DOTUPTO7]] +; DEFAULT-NEXT: [[INDEX_IS_9:%.*]] = icmp eq i32 [[INDEX]], 9 +; DEFAULT-NEXT: [[SRC_I9:%.*]] = extractelement <16 x <64 x i32>*> [[SRC]], i32 9 +; DEFAULT-NEXT: [[DOTUPTO9:%.*]] = select i1 [[INDEX_IS_9]], <64 x i32>* [[SRC_I9]], <64 x i32>* [[DOTUPTO8]] +; DEFAULT-NEXT: [[INDEX_IS_10:%.*]] = icmp eq i32 [[INDEX]], 10 +; DEFAULT-NEXT: [[SRC_I10:%.*]] = extractelement <16 x <64 x i32>*> [[SRC]], i32 10 +; DEFAULT-NEXT: [[DOTUPTO10:%.*]] = select i1 [[INDEX_IS_10]], <64 x i32>* [[SRC_I10]], <64 x i32>* [[DOTUPTO9]] +; DEFAULT-NEXT: [[INDEX_IS_11:%.*]] = icmp eq i32 [[INDEX]], 11 +; DEFAULT-NEXT: [[SRC_I11:%.*]] = extractelement <16 x <64 x i32>*> [[SRC]], i32 11 +; DEFAULT-NEXT: [[DOTUPTO11:%.*]] = select i1 [[INDEX_IS_11]], <64 x i32>* [[SRC_I11]], <64 x i32>* [[DOTUPTO10]] +; DEFAULT-NEXT: [[INDEX_IS_12:%.*]] = icmp eq i32 [[INDEX]], 12 +; DEFAULT-NEXT: [[SRC_I12:%.*]] = extractelement <16 x <64 x i32>*> [[SRC]], i32 12 +; DEFAULT-NEXT: [[DOTUPTO12:%.*]] = select i1 [[INDEX_IS_12]], <64 x i32>* [[SRC_I12]], <64 x i32>* [[DOTUPTO11]] +; DEFAULT-NEXT: [[INDEX_IS_13:%.*]] = icmp eq i32 [[INDEX]], 13 +; DEFAULT-NEXT: [[SRC_I13:%.*]] = extractelement <16 x <64 x i32>*> [[SRC]], i32 13 +; DEFAULT-NEXT: [[DOTUPTO13:%.*]] = select i1 [[INDEX_IS_13]], <64 x i32>* [[SRC_I13]], <64 x i32>* [[DOTUPTO12]] +; DEFAULT-NEXT: [[INDEX_IS_14:%.*]] = icmp eq i32 [[INDEX]], 14 +; DEFAULT-NEXT: [[SRC_I14:%.*]] = extractelement <16 x <64 x i32>*> [[SRC]], i32 14 +; DEFAULT-NEXT: [[DOTUPTO14:%.*]] = select i1 [[INDEX_IS_14]], <64 x i32>* [[SRC_I14]], <64 x i32>* [[DOTUPTO13]] +; DEFAULT-NEXT: [[INDEX_IS_15:%.*]] = icmp eq i32 [[INDEX]], 15 +; DEFAULT-NEXT: [[SRC_I15:%.*]] = extractelement <16 x <64 x i32>*> [[SRC]], i32 15 +; DEFAULT-NEXT: [[DOTUPTO15:%.*]] = select i1 [[INDEX_IS_15]], <64 x i32>* [[SRC_I15]], <64 x i32>* [[DOTUPTO14]] +; DEFAULT-NEXT: [[TMP1:%.*]] = load <64 x i32>, <64 x i32>* [[DOTUPTO15]], align 4 +; DEFAULT-NEXT: ret <64 x i32> [[TMP1]] +; +; OFF-LABEL: @f3( +; OFF-NEXT: [[TMP1:%.*]] = extractelement <16 x <64 x i32>*> [[SRC:%.*]], i32 [[INDEX:%.*]] +; OFF-NEXT: [[TMP2:%.*]] = load <64 x i32>, <64 x i32>* [[TMP1]], align 4 +; OFF-NEXT: ret <64 x i32> [[TMP2]] +; + %1 = extractelement <16 x <64 x i32>*> %src, i32 %index + %2 = load <64 x i32>, <64 x i32>* %1, align 4 + ret <64 x i32> %2 +} + +; Test that variable extractelement returning vector pointer is properly scalarized without crash. +define <2 x i32> @f4(<4 x <2 x i32>*> %src, <2 x i32> %value, i32 %index) { +; DEFAULT-LABEL: @f4( +; DEFAULT-NEXT: [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0 +; DEFAULT-NEXT: [[SRC_I0:%.*]] = extractelement <4 x <2 x i32>*> [[SRC:%.*]], i32 0 +; DEFAULT-NEXT: [[DOTUPTO0:%.*]] = select i1 [[INDEX_IS_0]], <2 x i32>* [[SRC_I0]], <2 x i32>* undef +; DEFAULT-NEXT: [[INDEX_IS_1:%.*]] = icmp eq i32 [[INDEX]], 1 +; DEFAULT-NEXT: [[SRC_I1:%.*]] = extractelement <4 x <2 x i32>*> [[SRC]], i32 1 +; DEFAULT-NEXT: [[DOTUPTO1:%.*]] = select i1 [[INDEX_IS_1]], <2 x i32>* [[SRC_I1]], <2 x i32>* [[DOTUPTO0]] +; DEFAULT-NEXT: [[INDEX_IS_2:%.*]] = icmp eq i32 [[INDEX]], 2 +; DEFAULT-NEXT: [[SRC_I2:%.*]] = extractelement <4 x <2 x i32>*> [[SRC]], i32 2 +; DEFAULT-NEXT: [[DOTUPTO2:%.*]] = select i1 [[INDEX_IS_2]], <2 x i32>* [[SRC_I2]], <2 x i32>* [[DOTUPTO1]] +; DEFAULT-NEXT: [[INDEX_IS_3:%.*]] = icmp eq i32 [[INDEX]], 3 +; DEFAULT-NEXT: [[SRC_I3:%.*]] = extractelement <4 x <2 x i32>*> [[SRC]], i32 3 +; DEFAULT-NEXT: [[DOTUPTO3:%.*]] = select i1 [[INDEX_IS_3]], <2 x i32>* [[SRC_I3]], <2 x i32>* [[DOTUPTO2]] +; DEFAULT-NEXT: store <2 x i32> [[VALUE:%.*]], <2 x i32>* [[DOTUPTO3]], align 8 +; DEFAULT-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[DOTUPTO3]], align 4 +; DEFAULT-NEXT: ret <2 x i32> [[TMP1]] +; +; OFF-LABEL: @f4( +; OFF-NEXT: [[TMP1:%.*]] = extractelement <4 x <2 x i32>*> [[SRC:%.*]], i32 [[INDEX:%.*]] +; OFF-NEXT: store <2 x i32> [[VALUE:%.*]], <2 x i32>* [[TMP1]], align 8 +; OFF-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4 +; OFF-NEXT: ret <2 x i32> [[TMP2]] +; + %1 = extractelement <4 x <2 x i32>*> %src, i32 %index + store <2 x i32> %value, <2 x i32>* %1 + %2 = load <2 x i32>, <2 x i32>* %1, align 4 + ret <2 x i32> %2 +} + +declare void @external1(<2 x i32*> %v) +declare void @external2(<4 x <2 x i32*>*> %v) + +; Test that vector pointer returned by variable extractelement is properly handled +; in other instructions. +define <2 x i32*>* @f5(<4 x <2 x i32*>*> %src, <2 x i32*> %value, i32 %index) { +; DEFAULT-LABEL: @f5( +; DEFAULT-NEXT: [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0 +; DEFAULT-NEXT: [[SRC_I0:%.*]] = extractelement <4 x <2 x i32*>*> [[SRC:%.*]], i32 0 +; DEFAULT-NEXT: [[DOTUPTO0:%.*]] = select i1 [[INDEX_IS_0]], <2 x i32*>* [[SRC_I0]], <2 x i32*>* undef +; DEFAULT-NEXT: [[INDEX_IS_1:%.*]] = icmp eq i32 [[INDEX]], 1 +; DEFAULT-NEXT: [[SRC_I1:%.*]] = extractelement <4 x <2 x i32*>*> [[SRC]], i32 1 +; DEFAULT-NEXT: [[DOTUPTO1:%.*]] = select i1 [[INDEX_IS_1]], <2 x i32*>* [[SRC_I1]], <2 x i32*>* [[DOTUPTO0]] +; DEFAULT-NEXT: [[INDEX_IS_2:%.*]] = icmp eq i32 [[INDEX]], 2 +; DEFAULT-NEXT: [[SRC_I2:%.*]] = extractelement <4 x <2 x i32*>*> [[SRC]], i32 2 +; DEFAULT-NEXT: [[DOTUPTO2:%.*]] = select i1 [[INDEX_IS_2]], <2 x i32*>* [[SRC_I2]], <2 x i32*>* [[DOTUPTO1]] +; DEFAULT-NEXT: [[INDEX_IS_3:%.*]] = icmp eq i32 [[INDEX]], 3 +; DEFAULT-NEXT: [[SRC_I3:%.*]] = extractelement <4 x <2 x i32*>*> [[SRC]], i32 3 +; DEFAULT-NEXT: [[DOTUPTO3:%.*]] = select i1 [[INDEX_IS_3]], <2 x i32*>* [[SRC_I3]], <2 x i32*>* [[DOTUPTO2]] +; DEFAULT-NEXT: [[TMP1:%.*]] = load <2 x i32*>, <2 x i32*>* [[DOTUPTO3]], align 4 +; DEFAULT-NEXT: [[DOTI0:%.*]] = extractelement <2 x i32*> [[TMP1]], i32 0 +; DEFAULT-NEXT: [[DOTI1:%.*]] = extractelement <2 x i32*> [[TMP1]], i32 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTI1]], align 4 +; DEFAULT-NEXT: store i32 [[TMP2]], i32* [[DOTI1]], align 4 +; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr <2 x i32*>, <2 x i32*>* [[DOTUPTO3]], i32 2 +; DEFAULT-NEXT: [[TMP4:%.*]] = bitcast <2 x i32*>* [[TMP3]] to i32** +; DEFAULT-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 +; DEFAULT-NEXT: store i32 [[TMP2]], i32* [[TMP5]], align 4 +; DEFAULT-NEXT: store <2 x i32*> [[VALUE:%.*]], <2 x i32*>* [[DOTUPTO3]], align 16 +; DEFAULT-NEXT: [[TMP6:%.*]] = bitcast <2 x i32*>* [[DOTUPTO3]] to i32** +; DEFAULT-NEXT: store <2 x i32*> [[VALUE]], <2 x i32*>* [[TMP3]], align 16 +; DEFAULT-NEXT: [[TMP7:%.*]] = load i32*, i32** [[TMP6]], align 8 +; DEFAULT-NEXT: [[DOTUPTO01:%.*]] = insertelement <2 x i32*> poison, i32* [[DOTI0]], i32 0 +; DEFAULT-NEXT: [[TMP8:%.*]] = insertelement <2 x i32*> [[DOTUPTO01]], i32* [[TMP7]], i32 1 +; DEFAULT-NEXT: call void @external1(<2 x i32*> [[TMP8]]) +; DEFAULT-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP2]], 0 +; DEFAULT-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], <2 x i32*>* [[DOTUPTO3]], <2 x i32*>* undef +; DEFAULT-NEXT: [[DOTUPTO03:%.*]] = insertelement <4 x <2 x i32*>*> poison, <2 x i32*>* [[TMP10]], i32 0 +; DEFAULT-NEXT: [[DOTUPTO14:%.*]] = insertelement <4 x <2 x i32*>*> [[DOTUPTO03]], <2 x i32*>* [[SRC_I1]], i32 1 +; DEFAULT-NEXT: [[DOTUPTO25:%.*]] = insertelement <4 x <2 x i32*>*> [[DOTUPTO14]], <2 x i32*>* [[SRC_I2]], i32 2 +; DEFAULT-NEXT: [[TMP11:%.*]] = insertelement <4 x <2 x i32*>*> [[DOTUPTO25]], <2 x i32*>* [[SRC_I3]], i32 3 +; DEFAULT-NEXT: call void @external2(<4 x <2 x i32*>*> [[TMP11]]) +; DEFAULT-NEXT: ret <2 x i32*>* [[DOTUPTO3]] +; +; OFF-LABEL: @f5( +; OFF-NEXT: [[SRC_I1:%.*]] = extractelement <4 x <2 x i32*>*> [[SRC:%.*]], i32 1 +; OFF-NEXT: [[SRC_I2:%.*]] = extractelement <4 x <2 x i32*>*> [[SRC]], i32 2 +; OFF-NEXT: [[SRC_I3:%.*]] = extractelement <4 x <2 x i32*>*> [[SRC]], i32 3 +; OFF-NEXT: [[TMP1:%.*]] = extractelement <4 x <2 x i32*>*> [[SRC]], i32 [[INDEX:%.*]] +; OFF-NEXT: [[TMP2:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP1]], align 4 +; OFF-NEXT: [[DOTI0:%.*]] = extractelement <2 x i32*> [[TMP2]], i32 0 +; OFF-NEXT: [[DOTI1:%.*]] = extractelement <2 x i32*> [[TMP2]], i32 1 +; OFF-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTI1]], align 4 +; OFF-NEXT: store i32 [[TMP3]], i32* [[DOTI1]], align 4 +; OFF-NEXT: [[TMP4:%.*]] = getelementptr <2 x i32*>, <2 x i32*>* [[TMP1]], i32 2 +; OFF-NEXT: [[TMP5:%.*]] = bitcast <2 x i32*>* [[TMP4]] to i32** +; OFF-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +; OFF-NEXT: store i32 [[TMP3]], i32* [[TMP6]], align 4 +; OFF-NEXT: store <2 x i32*> [[VALUE:%.*]], <2 x i32*>* [[TMP1]], align 16 +; OFF-NEXT: [[TMP7:%.*]] = bitcast <2 x i32*>* [[TMP1]] to i32** +; OFF-NEXT: store <2 x i32*> [[VALUE]], <2 x i32*>* [[TMP4]], align 16 +; OFF-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +; OFF-NEXT: [[DOTUPTO0:%.*]] = insertelement <2 x i32*> poison, i32* [[DOTI0]], i32 0 +; OFF-NEXT: [[TMP9:%.*]] = insertelement <2 x i32*> [[DOTUPTO0]], i32* [[TMP8]], i32 1 +; OFF-NEXT: call void @external1(<2 x i32*> [[TMP9]]) +; OFF-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP3]], 0 +; OFF-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], <2 x i32*>* [[TMP1]], <2 x i32*>* undef +; OFF-NEXT: [[DOTUPTO01:%.*]] = insertelement <4 x <2 x i32*>*> poison, <2 x i32*>* [[TMP11]], i32 0 +; OFF-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x <2 x i32*>*> [[DOTUPTO01]], <2 x i32*>* [[SRC_I1]], i32 1 +; OFF-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x <2 x i32*>*> [[DOTUPTO1]], <2 x i32*>* [[SRC_I2]], i32 2 +; OFF-NEXT: [[TMP12:%.*]] = insertelement <4 x <2 x i32*>*> [[DOTUPTO2]], <2 x i32*>* [[SRC_I3]], i32 3 +; OFF-NEXT: call void @external2(<4 x <2 x i32*>*> [[TMP12]]) +; OFF-NEXT: ret <2 x i32*>* [[TMP1]] +; + %1 = extractelement <4 x <2 x i32*>*> %src, i32 %index + %2 = load <2 x i32*>, <2 x i32*>* %1, align 4 + %3 = extractelement <2 x i32*> %2, i32 1 + %4 = load i32, i32* %3, align 4 + store i32 %4, i32* %3 + %5 = getelementptr <2 x i32*>, <2 x i32*>* %1, i32 2 + %6 = bitcast <2 x i32*>* %5 to i32** + %7 = load i32*, i32** %6 + store i32 %4, i32* %7 + store <2 x i32*> %value, <2 x i32*>* %1 + %8 = bitcast <2 x i32*>* %1 to i32** + store <2 x i32*> %value, <2 x i32*>* %5 + %9 = insertelement <4 x <2 x i32*>*> %src, <2 x i32*>* %1, i32 1 + %10 = load i32*, i32** %8 + %11 = insertelement <2 x i32*> %2, i32* %10, i32 1 + call void @external1(<2 x i32*> %11) + %12 = icmp eq i32 %4, 0 + %13 = select i1 %12, <2 x i32*>* %1, <2 x i32*>* undef + %14 = insertelement <4 x <2 x i32*>*> %src, <2 x i32*>* %13, i32 0 + call void @external2(<4 x <2 x i32*>*> %14) + ret <2 x i32*>* %1 +}