Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -430,13 +430,55 @@ // be the same value, extract from the pre-inserted value instead. if (isa(IE->getOperand(2)) && IndexC) return replaceOperand(EI, 0, IE->getOperand(0)); + } else if (auto *GEP = dyn_cast(I)) { + auto *VecType = dyn_cast(GEP->getType()); + if (VecType && IndexC && GEP->hasOneUse()) { + uint64_t IdxVal = IndexC->getZExtValue(); + ElementCount EC = VecType->getElementCount(); + if (IdxVal < EC.getKnownMinValue() && GEP->getNumOperands() == 2) { + // Find out why we have a vector result - there are possibilities: + // 1. We have a scalar pointer and a vector of indices, or + // 2. We have a vector of pointers and a scalar index, or + // 3. We have a vector of pointers and a vector of indices. + // Here we only consider combining for the first two cases, since for + // the last case the optimization is less obviously a win due to + // needing two extractelements instead of one. + Type *IdxType = GEP->getOperand(1)->getType(); + Type *NewElType = nullptr; + Value *NewPtr = nullptr, *NewIdx = nullptr; + if (VecType->getElementType() == GEP->getPointerOperandType()) { + assert(isa(IdxType) && + EC == cast(IdxType)->getElementCount()); + // Extract the vector index and create a new GEP with a scalar + // result. + NewElType = cast(GEP->getPointerOperandType()) + ->getElementType(); + NewPtr = GEP->getPointerOperand(); + NewIdx = Builder.CreateExtractElement(GEP->getOperand(1), IndexC); + } else if (VecType == GEP->getPointerOperandType() && + IdxType->isIntegerTy()) { + // Extract the pointer and create a new GEP with a scalar result. + NewElType = + cast(VecType->getElementType())->getElementType(); + NewPtr = + Builder.CreateExtractElement(GEP->getPointerOperand(), IndexC); + NewIdx = GEP->getOperand(1); + } + if (NewPtr) { + GetElementPtrInst *NewGEP = + GetElementPtrInst::Create(NewElType, NewPtr, {NewIdx}); + NewGEP->setIsInBounds(GEP->isInBounds()); + return NewGEP; + } + } + } + return nullptr; } else if (auto *SVI = dyn_cast(I)) { // If this is extracting an element from a shufflevector, figure out where // it came from and extract from the appropriate input element instead. // Restrict the following transformation to fixed-length vector. - if (isa(SVI->getType()) && isa(Index)) { - int SrcIdx = - SVI->getMaskValue(cast(Index)->getZExtValue()); + if (isa(SVI->getType()) && IndexC) { + int SrcIdx = SVI->getMaskValue(IndexC->getZExtValue()); Value *Src; unsigned LHSWidth = cast(SVI->getOperand(0)->getType()) ->getNumElements(); Index: llvm/test/Transforms/InstCombine/gep-vector-indices.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/gep-vector-indices.ll @@ -0,0 +1,105 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instcombine %s -S | FileCheck %s + +define i32* @vector_splat_indices_v2i64_ext0(i32* %a) { +; CHECK-LABEL: @vector_splat_indices_v2i64_ext0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 4 +; CHECK-NEXT: ret i32* [[RES]] +; +entry: + %tmp = insertelement <2 x i64> poison, i64 4, i32 0 + %splatof4 = shufflevector <2 x i64> %tmp, <2 x i64> poison, <2 x i32> zeroinitializer + %gep = getelementptr i32, i32* %a, <2 x i64> %splatof4 + %res = extractelement <2 x i32*> %gep, i32 0 + ret i32* %res +} + +define i32* @vector_splat_indices_nxv2i64_ext0(i32* %a) { +; CHECK-LABEL: @vector_splat_indices_nxv2i64_ext0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 extractelement ( shufflevector ( insertelement ( poison, i64 4, i32 0), poison, zeroinitializer), i32 0) +; CHECK-NEXT: ret i32* [[RES]] +; +entry: + %tmp = insertelement poison, i64 4, i32 0 + %splatof4 = shufflevector %tmp, poison, zeroinitializer + %gep = getelementptr inbounds i32, i32* %a, %splatof4 + %res = extractelement %gep, i32 0 + ret i32* %res +} + +define i32* @vector_indices_v2i64_ext0(i32* %a, <2 x i64> %indices) { +; CHECK-LABEL: @vector_indices_v2i64_ext0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[INDICES:%.*]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP0]] +; CHECK-NEXT: ret i32* [[RES]] +; +entry: + %gep = getelementptr i32, i32* %a, <2 x i64> %indices + %res = extractelement <2 x i32*> %gep, i32 0 + ret i32* %res +} + +define i32* @vector_indices_nxv1i64_ext0(i32* %a, %indices) { +; CHECK-LABEL: @vector_indices_nxv1i64_ext0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = extractelement [[INDICES:%.*]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP0]] +; CHECK-NEXT: ret i32* [[RES]] +; +entry: + %gep = getelementptr i32, i32* %a, %indices + %res = extractelement %gep, i32 0 + ret i32* %res +} + + +define i32* @vector_splat_ptrs_v2i64_ext0(i32* %a, i64 %index) { +; CHECK-LABEL: @vector_splat_ptrs_v2i64_ext0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX:%.*]] +; CHECK-NEXT: ret i32* [[RES]] +; +entry: + %tmp = insertelement <2 x i32*> poison, i32* %a, i32 0 + %splatofa = shufflevector <2 x i32*> %tmp, <2 x i32*> poison, <2 x i32> zeroinitializer + %gep = getelementptr i32, <2 x i32*> %splatofa, i64 %index + %res = extractelement <2 x i32*> %gep, i32 0 + ret i32* %res +} + + +define i32* @vector_splat_ptrs_nxv2i64_ext0(i32* %a, i64 %index) { +; CHECK-LABEL: @vector_splat_ptrs_nxv2i64_ext0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP:%.*]] = insertelement poison, i32* [[A:%.*]], i32 0 +; CHECK-NEXT: [[SPLATOFA:%.*]] = shufflevector [[TMP]], poison, zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = extractelement [[SPLATOFA]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = getelementptr i32, i32* [[TMP0]], i64 [[INDEX:%.*]] +; CHECK-NEXT: ret i32* [[RES]] +; +entry: + %tmp = insertelement poison, i32* %a, i32 0 + %splatofa = shufflevector %tmp, poison, zeroinitializer + %gep = getelementptr i32, %splatofa, i64 %index + %res = extractelement %gep, i32 0 + ret i32* %res +} + + +; Negative tests + +define i32* @vector_indices_nxv2i64_ext3(i32* %a, %indices) { +; CHECK-LABEL: @vector_indices_nxv2i64_ext3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], [[INDICES:%.*]] +; CHECK-NEXT: [[RES:%.*]] = extractelement [[GEP]], i32 3 +; CHECK-NEXT: ret i32* [[RES]] +; +entry: + %gep = getelementptr i32, i32* %a, %indices + %res = extractelement %gep, i32 3 + ret i32* %res +} Index: llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll =================================================================== --- llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll +++ llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll @@ -21,7 +21,7 @@ define void @get_image() nounwind { ; CHECK-LABEL: @get_image( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @fgetc(i8* null) [[ATTR0:#.*]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @fgetc(i8* null) #[[ATTR0:[0-9]+]] ; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] @@ -504,8 +504,8 @@ define i32* @gep_vbase_w_s_idx(<2 x i32*> %base) { ; CHECK-LABEL: @gep_vbase_w_s_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASE:%.*]], i64 1 -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32*> [[BASE:%.*]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[TMP1]], i64 1 ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, <2 x i32*> %base, i64 1 @@ -515,9 +515,7 @@ define i32* @gep_splat_base_w_s_idx(i32* %base) { ; CHECK-LABEL: @gep_splat_base_w_s_idx( -; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x i32*> poison, i32* [[BASE:%.*]], i32 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], i64 1 -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 1 ; CHECK-NEXT: ret i32* [[EE]] ; %basevec1 = insertelement <2 x i32*> poison, i32* %base, i32 0 @@ -561,8 +559,7 @@ define i32* @gep_cvbase_w_s_idx(<2 x i32*> %base, i64 %raw_addr) { ; CHECK-LABEL: @gep_cvbase_w_s_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> , i64 [[RAW_ADDR:%.*]] -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* @GLOBAL, i64 [[RAW_ADDR:%.*]] ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, <2 x i32*> , i64 %raw_addr @@ -582,8 +579,7 @@ define i32* @gep_sbase_w_cv_idx(i32* %base) { ; CHECK-LABEL: @gep_sbase_w_cv_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 1 ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, i32* %base, <2 x i64> @@ -593,9 +589,7 @@ define i32* @gep_sbase_w_splat_idx(i32* %base, i64 %idx) { ; CHECK-LABEL: @gep_sbase_w_splat_idx( -; CHECK-NEXT: [[IDXVEC2:%.*]] = insertelement <2 x i64> poison, i64 [[IDX:%.*]], i32 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> [[IDXVEC2]] -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[IDX:%.*]] ; CHECK-NEXT: ret i32* [[EE]] ; %idxvec1 = insertelement <2 x i64> poison, i64 %idx, i32 0 Index: llvm/test/Transforms/InstCombine/vec_demanded_elts.ll =================================================================== --- llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -21,7 +21,7 @@ define void @get_image() nounwind { ; CHECK-LABEL: @get_image( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @fgetc(i8* null) [[ATTR0:#.*]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @fgetc(i8* null) #[[ATTR0:[0-9]+]] ; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] @@ -504,8 +504,8 @@ define i32* @gep_vbase_w_s_idx(<2 x i32*> %base) { ; CHECK-LABEL: @gep_vbase_w_s_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASE:%.*]], i64 1 -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32*> [[BASE:%.*]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[TMP1]], i64 1 ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, <2 x i32*> %base, i64 1 @@ -515,9 +515,7 @@ define i32* @gep_splat_base_w_s_idx(i32* %base) { ; CHECK-LABEL: @gep_splat_base_w_s_idx( -; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], i64 1 -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 1 ; CHECK-NEXT: ret i32* [[EE]] ; %basevec1 = insertelement <2 x i32*> undef, i32* %base, i32 0 @@ -561,8 +559,7 @@ define i32* @gep_cvbase_w_s_idx(<2 x i32*> %base, i64 %raw_addr) { ; CHECK-LABEL: @gep_cvbase_w_s_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> , i64 [[RAW_ADDR:%.*]] -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* @GLOBAL, i64 [[RAW_ADDR:%.*]] ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, <2 x i32*> , i64 %raw_addr @@ -582,8 +579,7 @@ define i32* @gep_sbase_w_cv_idx(i32* %base) { ; CHECK-LABEL: @gep_sbase_w_cv_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 1 ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, i32* %base, <2 x i64> @@ -593,9 +589,7 @@ define i32* @gep_sbase_w_splat_idx(i32* %base, i64 %idx) { ; CHECK-LABEL: @gep_sbase_w_splat_idx( -; CHECK-NEXT: [[IDXVEC2:%.*]] = insertelement <2 x i64> undef, i64 [[IDX:%.*]], i32 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> [[IDXVEC2]] -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[IDX:%.*]] ; CHECK-NEXT: ret i32* [[EE]] ; %idxvec1 = insertelement <2 x i64> undef, i64 %idx, i32 0