diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -430,6 +430,47 @@ // be the same value, extract from the pre-inserted value instead. if (isa(IE->getOperand(2)) && IndexC) return replaceOperand(EI, 0, IE->getOperand(0)); + } else if (auto *GEP = dyn_cast(I)) { + auto *VecType = cast(GEP->getType()); + ElementCount EC = VecType->getElementCount(); + uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0; + if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) { + // Find out why we have a vector result - these are a few examples: + // 1. We have a scalar pointer and a vector of indices, or + // 2. We have a vector of pointers and a scalar index, or + // 3. We have a vector of pointers and a vector of indices, etc. + // Here we only consider combining when there is exactly one vector + // operand, since the optimization is less obviously a win due to + // needing more than one extractelements. + + unsigned VectorOps = + llvm::count_if(GEP->operands(), [](const Value *V) { + return isa(V->getType()); + }); + if (VectorOps > 1) + return nullptr; + assert(VectorOps == 1 && "Expected exactly one vector GEP operand!"); + + Value *NewPtr = GEP->getPointerOperand(); + if (isa(NewPtr->getType())) + NewPtr = Builder.CreateExtractElement(NewPtr, IndexC); + + SmallVector NewOps; + for (unsigned I = 1; I != GEP->getNumOperands(); ++I) { + Value *Op = GEP->getOperand(I); + if (auto *OpTy = dyn_cast(Op->getType())) + NewOps.push_back(Builder.CreateExtractElement(Op, IndexC)); + else + NewOps.push_back(Op); + } + + GetElementPtrInst *NewGEP = GetElementPtrInst::Create( + cast(NewPtr->getType())->getElementType(), NewPtr, + NewOps); + NewGEP->setIsInBounds(GEP->isInBounds()); + return NewGEP; + } + return nullptr; } else if (auto *SVI = dyn_cast(I)) { // If this is extracting an element from a shufflevector, figure out where // it came from and extract from the appropriate input element instead. diff --git a/llvm/test/Transforms/InstCombine/gep-vector-indices.ll b/llvm/test/Transforms/InstCombine/gep-vector-indices.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/gep-vector-indices.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instcombine %s -S | FileCheck %s + +define i32* @vector_splat_indices_v2i64_ext0(i32* %a) { +; CHECK-LABEL: @vector_splat_indices_v2i64_ext0( +; CHECK-NEXT: [[RES:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 4 +; CHECK-NEXT: ret i32* [[RES]] +; + %gep = getelementptr i32, i32* %a, <2 x i64> + %res = extractelement <2 x i32*> %gep, i32 0 + ret i32* %res +} + +define i32* @vector_splat_indices_nxv2i64_ext0(i32* %a) { +; CHECK-LABEL: @vector_splat_indices_nxv2i64_ext0( +; CHECK-NEXT: [[RES:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 extractelement ( shufflevector ( insertelement ( poison, i64 4, i32 0), poison, zeroinitializer), i32 0) +; CHECK-NEXT: ret i32* [[RES]] +; + %tmp = insertelement poison, i64 4, i32 0 + %splatof4 = shufflevector %tmp, poison, zeroinitializer + %gep = getelementptr inbounds i32, i32* %a, %splatof4 + %res = extractelement %gep, i32 0 + ret i32* %res +} + +define i32* @vector_indices_v2i64_ext0(i32* %a, <2 x i64> %indices) { +; CHECK-LABEL: @vector_indices_v2i64_ext0( +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[INDICES:%.*]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP0]] +; CHECK-NEXT: ret i32* [[RES]] +; + %gep = getelementptr i32, i32* %a, <2 x i64> %indices + %res = extractelement <2 x i32*> %gep, i32 0 + ret i32* %res +} + +define i32* @vector_indices_nxv1i64_ext0(i32* %a, %indices) { +; CHECK-LABEL: @vector_indices_nxv1i64_ext0( +; CHECK-NEXT: [[TMP0:%.*]] = extractelement [[INDICES:%.*]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP0]] +; CHECK-NEXT: ret i32* [[RES]] +; + %gep = getelementptr i32, i32* %a, %indices + %res = extractelement %gep, i32 0 + ret i32* %res +} + + +define i32* @vector_splat_ptrs_v2i64_ext0(i32* %a, i64 %index) { +; CHECK-LABEL: @vector_splat_ptrs_v2i64_ext0( +; CHECK-NEXT: [[RES:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX:%.*]] +; CHECK-NEXT: ret i32* [[RES]] +; + %tmp = insertelement <2 x i32*> poison, i32* %a, i32 0 + %splatofa = shufflevector <2 x i32*> %tmp, <2 x i32*> poison, <2 x i32> zeroinitializer + %gep = getelementptr i32, <2 x i32*> %splatofa, i64 %index + %res = extractelement <2 x i32*> %gep, i32 0 + ret i32* %res +} + + +define i32* @vector_splat_ptrs_nxv2i64_ext0(i32* %a, i64 %index) { +; CHECK-LABEL: @vector_splat_ptrs_nxv2i64_ext0( +; CHECK-NEXT: [[TMP:%.*]] = insertelement poison, i32* [[A:%.*]], i32 0 +; CHECK-NEXT: [[SPLATOFA:%.*]] = shufflevector [[TMP]], poison, zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = extractelement [[SPLATOFA]], i32 0 +; CHECK-NEXT: [[RES:%.*]] = getelementptr i32, i32* [[TMP0]], i64 [[INDEX:%.*]] +; CHECK-NEXT: ret i32* [[RES]] +; + %tmp = insertelement poison, i32* %a, i32 0 + %splatofa = shufflevector %tmp, poison, zeroinitializer + %gep = getelementptr i32, %splatofa, i64 %index + %res = extractelement %gep, i32 0 + ret i32* %res +} + + +define float* @vector_struct1_splat_indices_v4i64_ext1({float, float}* %a) { +; CHECK-LABEL: @vector_struct1_splat_indices_v4i64_ext1( +; CHECK-NEXT: [[RES:%.*]] = getelementptr { float, float }, { float, float }* [[A:%.*]], i64 4, i32 0 +; CHECK-NEXT: ret float* [[RES]] +; + %gep = getelementptr {float, float}, {float, float}* %a, <4 x i32> , i32 0 + %res = extractelement <4 x float*> %gep, i32 1 + ret float* %res +} + + +define float* @vector_struct2_splat_indices_v4i64_ext1({float, [8 x float]}* %a) { +; CHECK-LABEL: @vector_struct2_splat_indices_v4i64_ext1( +; CHECK-NEXT: [[RES:%.*]] = getelementptr { float, [8 x float] }, { float, [8 x float] }* [[A:%.*]], i64 2, i32 1, i64 4 +; CHECK-NEXT: ret float* [[RES]] +; + %gep = getelementptr {float, [8 x float]}, {float, [8 x float]}* %a, i32 2, i32 1, <4 x i32> + %res = extractelement <4 x float*> %gep, i32 1 + ret float* %res +} + + +; Negative tests + +define i32* @vector_indices_nxv2i64_ext3(i32* %a, %indices) { +; CHECK-LABEL: @vector_indices_nxv2i64_ext3( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], [[INDICES:%.*]] +; CHECK-NEXT: [[RES:%.*]] = extractelement [[GEP]], i32 3 +; CHECK-NEXT: ret i32* [[RES]] +; + %gep = getelementptr i32, i32* %a, %indices + %res = extractelement %gep, i32 3 + ret i32* %res +} + +define i32* @vector_indices_nxv2i64_extN(i32* %a, %indices, i32 %N) { +; CHECK-LABEL: @vector_indices_nxv2i64_extN( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], [[INDICES:%.*]] +; CHECK-NEXT: [[RES:%.*]] = extractelement [[GEP]], i32 [[N:%.*]] +; CHECK-NEXT: ret i32* [[RES]] +; + %gep = getelementptr i32, i32* %a, %indices + %res = extractelement %gep, i32 %N + ret i32* %res +} + +define void @vector_indices_nxv2i64_mulitple_use(i32* %a, %indices, i32** %b, i32** %c) { +; CHECK-LABEL: @vector_indices_nxv2i64_mulitple_use( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], [[INDICES:%.*]] +; CHECK-NEXT: [[LANE0:%.*]] = extractelement [[GEP]], i32 0 +; CHECK-NEXT: [[LANE1:%.*]] = extractelement [[GEP]], i32 1 +; CHECK-NEXT: store i32* [[LANE0]], i32** [[B:%.*]], align 8 +; CHECK-NEXT: store i32* [[LANE1]], i32** [[C:%.*]], align 8 +; CHECK-NEXT: ret void +; + %gep = getelementptr i32, i32* %a, %indices + %lane0 = extractelement %gep, i32 0 + %lane1 = extractelement %gep, i32 1 + store i32* %lane0, i32** %b, align 8 + store i32* %lane1, i32** %c, align 8 + ret void +} + +define i32* @vector_ptrs_and_indices_ext0( %a, %indices) { +; CHECK-LABEL: @vector_ptrs_and_indices_ext0( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, [[A:%.*]], [[INDICES:%.*]] +; CHECK-NEXT: [[RES:%.*]] = extractelement [[GEP]], i32 0 +; CHECK-NEXT: ret i32* [[RES]] +; + %gep = getelementptr i32, %a, %indices + %res = extractelement %gep, i32 0 + ret i32* %res +} diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll @@ -499,13 +499,10 @@ ret <3 x float> %r } -;; TODO: getelementptr tests below show missing simplifications for -;; vector demanded elements on vector geps. - define i32* @gep_vbase_w_s_idx(<2 x i32*> %base, i64 %index) { ; CHECK-LABEL: @gep_vbase_w_s_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASE:%.*]], i64 [[INDEX:%.*]] -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32*> [[BASE:%.*]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[TMP1]], i64 %index ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, <2 x i32*> %base, i64 %index @@ -515,9 +512,7 @@ define i32* @gep_splat_base_w_s_idx(i32* %base) { ; CHECK-LABEL: @gep_splat_base_w_s_idx( -; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x i32*> poison, i32* [[BASE:%.*]], i32 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], i64 1 -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 1 ; CHECK-NEXT: ret i32* [[EE]] ; %basevec1 = insertelement <2 x i32*> poison, i32* %base, i32 0 @@ -561,8 +556,7 @@ define i32* @gep_cvbase_w_s_idx(<2 x i32*> %base, i64 %raw_addr) { ; CHECK-LABEL: @gep_cvbase_w_s_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> , i64 [[RAW_ADDR:%.*]] -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* @GLOBAL, i64 [[RAW_ADDR:%.*]] ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, <2 x i32*> , i64 %raw_addr @@ -582,8 +576,7 @@ define i32* @gep_sbase_w_cv_idx(i32* %base) { ; CHECK-LABEL: @gep_sbase_w_cv_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 1 ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, i32* %base, <2 x i64> @@ -593,9 +586,7 @@ define i32* @gep_sbase_w_splat_idx(i32* %base, i64 %idx) { ; CHECK-LABEL: @gep_sbase_w_splat_idx( -; CHECK-NEXT: [[IDXVEC2:%.*]] = insertelement <2 x i64> poison, i64 [[IDX:%.*]], i32 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> [[IDXVEC2]] -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[IDX:%.*]] ; CHECK-NEXT: ret i32* [[EE]] ; %idxvec1 = insertelement <2 x i64> poison, i64 %idx, i32 0 diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -499,13 +499,10 @@ ret <3 x float> %r } -;; TODO: getelementptr tests below show missing simplifications for -;; vector demanded elements on vector geps. - define i32* @gep_vbase_w_s_idx(<2 x i32*> %base, i64 %index) { ; CHECK-LABEL: @gep_vbase_w_s_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASE:%.*]], i64 [[INDEX:%.*]] -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32*> [[BASE:%.*]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[TMP1]], i64 %index ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, <2 x i32*> %base, i64 %index @@ -515,9 +512,7 @@ define i32* @gep_splat_base_w_s_idx(i32* %base) { ; CHECK-LABEL: @gep_splat_base_w_s_idx( -; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], i64 1 -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 1 ; CHECK-NEXT: ret i32* [[EE]] ; %basevec1 = insertelement <2 x i32*> undef, i32* %base, i32 0 @@ -561,8 +556,7 @@ define i32* @gep_cvbase_w_s_idx(<2 x i32*> %base, i64 %raw_addr) { ; CHECK-LABEL: @gep_cvbase_w_s_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> , i64 [[RAW_ADDR:%.*]] -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* @GLOBAL, i64 [[RAW_ADDR:%.*]] ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, <2 x i32*> , i64 %raw_addr @@ -582,8 +576,7 @@ define i32* @gep_sbase_w_cv_idx(i32* %base) { ; CHECK-LABEL: @gep_sbase_w_cv_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 1 ; CHECK-NEXT: ret i32* [[EE]] ; %gep = getelementptr i32, i32* %base, <2 x i64> @@ -593,9 +586,7 @@ define i32* @gep_sbase_w_splat_idx(i32* %base, i64 %idx) { ; CHECK-LABEL: @gep_sbase_w_splat_idx( -; CHECK-NEXT: [[IDXVEC2:%.*]] = insertelement <2 x i64> undef, i64 [[IDX:%.*]], i32 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> [[IDXVEC2]] -; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 +; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[IDX:%.*]] ; CHECK-NEXT: ret i32* [[EE]] ; %idxvec1 = insertelement <2 x i64> undef, i64 %idx, i32 0 diff --git a/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg-inseltpoison.ll --- a/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg-inseltpoison.ll @@ -3,9 +3,9 @@ define <4 x i16*> @PR41270([4 x i16]* %x) { ; CHECK-LABEL: @PR41270( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x [4 x i16]*> undef, [4 x i16]* [[X:%.*]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i16], <4 x [4 x i16]*> [[TMP1]], i64 0, i64 3 -; CHECK-NEXT: ret <4 x i16*> [[TMP2]] +; CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* [[X:%.*]], i64 0, i64 3 +; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x i16*> poison, i16* [[T3]], i32 0 +; CHECK-NEXT: ret <4 x i16*> [[INS2]] ; %ins = insertelement <4 x [4 x i16]*> poison, [4 x i16]* %x, i32 0 %splat = shufflevector <4 x [4 x i16]*> %ins, <4 x [4 x i16]*> poison, <4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg.ll b/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg.ll --- a/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg.ll +++ b/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg.ll @@ -3,9 +3,9 @@ define <4 x i16*> @PR41270([4 x i16]* %x) { ; CHECK-LABEL: @PR41270( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x [4 x i16]*> undef, [4 x i16]* [[X:%.*]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i16], <4 x [4 x i16]*> [[TMP1]], i64 0, i64 3 -; CHECK-NEXT: ret <4 x i16*> [[TMP2]] +; CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* [[X:%.*]], i64 0, i64 3 +; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x i16*> undef, i16* [[T3]], i32 0 +; CHECK-NEXT: ret <4 x i16*> [[INS2]] ; %ins = insertelement <4 x [4 x i16]*> undef, [4 x i16]* %x, i32 0 %splat = shufflevector <4 x [4 x i16]*> %ins, <4 x [4 x i16]*> undef, <4 x i32> zeroinitializer