Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -430,6 +430,52 @@ // be the same value, extract from the pre-inserted value instead. if (isa(IE->getOperand(2)) && IndexC) return replaceOperand(EI, 0, IE->getOperand(0)); + } else if (auto *GEP = dyn_cast(I)) { + if (IndexC && GEP->hasOneUse() && GEP->getNumOperands() == 2) { + uint64_t IdxVal = IndexC->getZExtValue(); + auto *VecType = cast(GEP->getType()); + ElementCount EC = VecType->getElementCount(); + if (IdxVal < EC.getKnownMinValue()) { + // Find out why we have a vector result - there are three + // possibilities: + // 1. We have a scalar pointer and a vector of indices, or + // 2. We have a vector of pointers and a scalar index, or + // 3. We have a vector of pointers and a vector of indices. + // Here we only consider combining for the first two cases, since for + // the last case the optimization is less obviously a win due to + // needing two extractelements instead of one. + Type *IdxType = GEP->getOperand(1)->getType(); + Type *NewElType = nullptr; + Value *NewPtr = nullptr, *NewIdx = nullptr; + if (VecType->getElementType() == GEP->getPointerOperandType()) { + assert(isa(IdxType) && + EC == cast(IdxType)->getElementCount() && + "Expected 2nd GEP operand to be a vector of indices with " + "same element count as the result"); + // Extract the vector index and create a new GEP with a scalar + // result. + NewElType = cast(GEP->getPointerOperandType()) + ->getElementType(); + NewPtr = GEP->getPointerOperand(); + NewIdx = Builder.CreateExtractElement(GEP->getOperand(1), IndexC); + } else if (VecType == GEP->getPointerOperandType() && + IdxType->isIntegerTy()) { + // Extract the pointer and create a new GEP with a scalar result. + NewElType = + cast(VecType->getElementType())->getElementType(); + NewPtr = + Builder.CreateExtractElement(GEP->getPointerOperand(), IndexC); + NewIdx = GEP->getOperand(1); + } + if (NewPtr) { + GetElementPtrInst *NewGEP = + GetElementPtrInst::Create(NewElType, NewPtr, {NewIdx}); + NewGEP->setIsInBounds(GEP->isInBounds()); + return NewGEP; + } + } + } + return nullptr; } else if (auto *SVI = dyn_cast(I)) { // If this is extracting an element from a shufflevector, figure out where // it came from and extract from the appropriate input element instead. Index: llvm/test/Transforms/InstCombine/gep-vector-indices.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/gep-vector-indices.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instcombine %s -S | FileCheck %s + +define i32* @vector_splat_indices_v2i64_ext0(i32* %a) { +; CHECK-LABEL: @vector_splat_indices_v2i64_ext0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 4 +; CHECK-NEXT: ret i32* [[RES]] +; +entry: + %tmp = insertelement <2 x i64> poison, i64 4, i32 0 + %splatof4 = shufflevector <2 x i64> %tmp, <2 x i64> poison, <2 x i32> zeroinitializer + %gep = getelementptr i32, i32* %a, <2 x i64> %splatof4 + %res = extractelement <2 x i32*> %gep, i32 0 + ret i32* %res +} + +define i32* @vector_splat_indices_nxv2i64_ext0(i32* %a) { +; CHECK-LABEL: @vector_splat_indices_nxv2i64_ext0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 extractelement ( shufflevector ( insertelement ( poison, i64 4, i32 0), poison, zeroinitializer), i32 0) +; CHECK-NEXT: ret i32* [[RES]] +; +entry: + %tmp = insertelement poison, i64 4, i32 0 + %splatof4 = shufflevector %tmp, poison, zeroinitializer + %gep = getelementptr inbounds i32, i32* %a, %splatof4 + %res = extractelement %gep, i32 0 + ret i32* %res +} + +define i32* @vector_indices_v2i64_ext0(i32* %a, <2 x i64> %indices) { +; CHECK-LABEL: @vector_indices_v2i64_ext0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[INDICES:%.*]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP0]] +; CHECK-NEXT: ret i32* [[RES]] +; +entry: + %gep = getelementptr i32, i32* %a, <2 x i64> %indices + %res = extractelement <2 x i32*> %gep, i32 0 + ret i32* %res +} + +define i32* @vector_indices_nxv1i64_ext0(i32* %a, %indices) { +; CHECK-LABEL: @vector_indices_nxv1i64_ext0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = extractelement [[INDICES:%.*]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP0]] +; CHECK-NEXT: ret i32* [[RES]] +; +entry: + %gep = getelementptr i32, i32* %a, %indices + %res = extractelement %gep, i32 0 + ret i32* %res +} + + +define i32* @vector_splat_ptrs_v2i64_ext0(i32* %a, i64 %index) { +; CHECK-LABEL: @vector_splat_ptrs_v2i64_ext0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX:%.*]] +; CHECK-NEXT: ret i32* [[RES]] +; +entry: + %tmp = insertelement <2 x i32*> poison, i32* %a, i32 0 + %splatofa = shufflevector <2 x i32*> %tmp, <2 x i32*> poison, <2 x i32> zeroinitializer + %gep = getelementptr i32, <2 x i32*> %splatofa, i64 %index + %res = extractelement <2 x i32*> %gep, i32 0 + ret i32* %res +} + + +define i32* @vector_splat_ptrs_nxv2i64_ext0(i32* %a, i64 %index) { +; CHECK-LABEL: @vector_splat_ptrs_nxv2i64_ext0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP:%.*]] = insertelement poison, i32* [[A:%.*]], i32 0 +; CHECK-NEXT: [[SPLATOFA:%.*]] = shufflevector [[TMP]], poison, zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = extractelement [[SPLATOFA]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = getelementptr i32, i32* [[TMP0]], i64 [[INDEX:%.*]] +; CHECK-NEXT: ret i32* [[RES]] +; +entry: + %tmp = insertelement poison, i32* %a, i32 0 + %splatofa = shufflevector %tmp, poison, zeroinitializer + %gep = getelementptr i32, %splatofa, i64 %index + %res = extractelement %gep, i32 0 + ret i32* %res +} + + +; Negative tests + +define i32* @vector_indices_nxv2i64_ext3(i32* %a, %indices) { +; CHECK-LABEL: @vector_indices_nxv2i64_ext3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], [[INDICES:%.*]] +; CHECK-NEXT: [[RES:%.*]] = extractelement [[GEP]], i32 3 +; CHECK-NEXT: ret i32* [[RES]] +; +entry: + %gep = getelementptr i32, i32* %a, %indices + %res = extractelement %gep, i32 3 + ret i32* %res +} + +define i32* @vector_indices_nxv2i64_extN(i32* %a, %indices, i32 %N) { +; CHECK-LABEL: @vector_indices_nxv2i64_extN( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], [[INDICES:%.*]] +; CHECK-NEXT: [[RES:%.*]] = extractelement [[GEP]], i32 [[N:%.*]] +; CHECK-NEXT: ret i32* [[RES]] +; +entry: + %gep = getelementptr i32, i32* %a, %indices + %res = extractelement %gep, i32 %N + ret i32* %res +} + +define void @vector_indices_nxv2i64_mulitple_use(i32* %a, %indices, i32** %b, i32** %c) { +; CHECK-LABEL: @vector_indices_nxv2i64_mulitple_use( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], [[INDICES:%.*]] +; CHECK-NEXT: [[LANE0:%.*]] = extractelement [[GEP]], i32 0 +; CHECK-NEXT: [[LANE1:%.*]] = extractelement [[GEP]], i32 1 +; CHECK-NEXT: store i32* [[LANE0]], i32** [[B:%.*]], align 8 +; CHECK-NEXT: store i32* [[LANE1]], i32** [[C:%.*]], align 8 +; CHECK-NEXT: ret void +; +entry: + %gep = getelementptr i32, i32* %a, %indices + %lane0 = extractelement %gep, i32 0 + %lane1 = extractelement %gep, i32 1 + store i32* %lane0, i32** %b, align 8 + store i32* %lane1, i32** %c, align 8 + ret void +} + +define i32* @vector_ptrs_and_indices_ext0( %a, %indices) { +; CHECK-LABEL: @vector_ptrs_and_indices_ext0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, [[A:%.*]], [[INDICES:%.*]] +; CHECK-NEXT: [[RES:%.*]] = extractelement [[GEP]], i32 0 +; CHECK-NEXT: ret i32* [[RES]] +; +entry: + %gep = getelementptr i32, %a, %indices + %res = extractelement %gep, i32 0 + ret i32* %res +} Index: llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll =================================================================== --- llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll +++ llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll @@ -499,13 +499,10 @@ ret <3 x float> %r } -;; TODO: getelementptr tests below show missing simplifications for -;; vector demanded elements on vector geps. - define i32* @gep_vbase_w_s_idx(<2 x i32*> %base, i64 %index) { ; CHECK-LABEL: @gep_vbase_w_s_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASE:%.*]], i64 [[INDEX:%.*]] -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32*> [[BASE:%.*]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[TMP1]], i64 %index ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, <2 x i32*> %base, i64 %index @@ -515,9 +512,7 @@ define i32* @gep_splat_base_w_s_idx(i32* %base) { ; CHECK-LABEL: @gep_splat_base_w_s_idx( -; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x i32*> poison, i32* [[BASE:%.*]], i32 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], i64 1 -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 1 ; CHECK-NEXT: ret i32* [[EE]] ; %basevec1 = insertelement <2 x i32*> poison, i32* %base, i32 0 @@ -561,8 +556,7 @@ define i32* @gep_cvbase_w_s_idx(<2 x i32*> %base, i64 %raw_addr) { ; CHECK-LABEL: @gep_cvbase_w_s_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> , i64 [[RAW_ADDR:%.*]] -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* @GLOBAL, i64 [[RAW_ADDR:%.*]] ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, <2 x i32*> , i64 %raw_addr @@ -582,8 +576,7 @@ define i32* @gep_sbase_w_cv_idx(i32* %base) { ; CHECK-LABEL: @gep_sbase_w_cv_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 1 ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, i32* %base, <2 x i64> @@ -593,9 +586,7 @@ define i32* @gep_sbase_w_splat_idx(i32* %base, i64 %idx) { ; CHECK-LABEL: @gep_sbase_w_splat_idx( -; CHECK-NEXT: [[IDXVEC2:%.*]] = insertelement <2 x i64> poison, i64 [[IDX:%.*]], i32 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> [[IDXVEC2]] -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[IDX:%.*]] ; CHECK-NEXT: ret i32* [[EE]] ; %idxvec1 = insertelement <2 x i64> poison, i64 %idx, i32 0 Index: llvm/test/Transforms/InstCombine/vec_demanded_elts.ll =================================================================== --- llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -499,13 +499,10 @@ ret <3 x float> %r } -;; TODO: getelementptr tests below show missing simplifications for -;; vector demanded elements on vector geps. - define i32* @gep_vbase_w_s_idx(<2 x i32*> %base, i64 %index) { ; CHECK-LABEL: @gep_vbase_w_s_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASE:%.*]], i64 [[INDEX:%.*]] -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32*> [[BASE:%.*]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[TMP1]], i64 %index ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, <2 x i32*> %base, i64 %index @@ -515,9 +512,7 @@ define i32* @gep_splat_base_w_s_idx(i32* %base) { ; CHECK-LABEL: @gep_splat_base_w_s_idx( -; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], i64 1 -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 1 ; CHECK-NEXT: ret i32* [[EE]] ; %basevec1 = insertelement <2 x i32*> undef, i32* %base, i32 0 @@ -561,8 +556,7 @@ define i32* @gep_cvbase_w_s_idx(<2 x i32*> %base, i64 %raw_addr) { ; CHECK-LABEL: @gep_cvbase_w_s_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> , i64 [[RAW_ADDR:%.*]] -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* @GLOBAL, i64 [[RAW_ADDR:%.*]] ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, <2 x i32*> , i64 %raw_addr @@ -582,8 +576,7 @@ define i32* @gep_sbase_w_cv_idx(i32* %base) { ; CHECK-LABEL: @gep_sbase_w_cv_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 1 ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, i32* %base, <2 x i64> @@ -593,9 +586,7 @@ define i32* @gep_sbase_w_splat_idx(i32* %base, i64 %idx) { ; CHECK-LABEL: @gep_sbase_w_splat_idx( -; CHECK-NEXT: [[IDXVEC2:%.*]] = insertelement <2 x i64> undef, i64 [[IDX:%.*]], i32 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> [[IDXVEC2]] -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[IDX:%.*]] ; CHECK-NEXT: ret i32* [[EE]] ; %idxvec1 = insertelement <2 x i64> undef, i64 %idx, i32 0