Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1183,6 +1183,18 @@ switch (I->getOpcode()) { default: break; + case Instruction::GetElementPtr: { + // Conservatively track the demanded elements back through any vector + // operands we may have. We know there must be at least one, or we + // wouldn't have a vector result to get here. Note that we intentionally + // merge the undef bits here since gepping with either an undef base or + // index results in undef. + for (unsigned i = 0; i < I->getNumOperands(); i++) + if (I->getOperand(i)->getType()->isVectorTy()) + simplifyAndSetOp(I, i, DemandedElts, UndefElts); + + break; + } case Instruction::InsertElement: { // If this is a variable index, we don't know which element it overwrites. // demand exactly the same input as we produce. Index: llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll +++ llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -512,8 +512,7 @@ define i32* @gep_splat_base_w_s_idx(i32* %base) { ; CHECK-LABEL: @gep_splat_base_w_s_idx( -; CHECK-NEXT: [[BASEVEC1:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 0 -; CHECK-NEXT: [[BASEVEC2:%.*]] = shufflevector <2 x i32*> [[BASEVEC1]], <2 x i32*> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], i64 1 ; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 ; CHECK-NEXT: ret i32* [[EE]] @@ -528,9 +527,8 @@ define i32* @gep_splat_base_w_cv_idx(i32* %base) { ; CHECK-LABEL: @gep_splat_base_w_cv_idx( -; CHECK-NEXT: [[BASEVEC1:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 0 -; CHECK-NEXT: [[BASEVEC2:%.*]] = shufflevector <2 x i32*> [[BASEVEC1]], <2 x i32*> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], <2 x i64> +; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], <2 x i64> ; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 ; CHECK-NEXT: ret i32* [[EE]] ; @@ -543,8 +541,7 @@ define i32* @gep_splat_base_w_vidx(i32* %base, <2 x i64> %idxvec) { ; CHECK-LABEL: @gep_splat_base_w_vidx( -; CHECK-NEXT: [[BASEVEC1:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 0 -; CHECK-NEXT: [[BASEVEC2:%.*]] = shufflevector <2 x i32*> [[BASEVEC1]], <2 x i32*> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], <2 x i64> [[IDXVEC:%.*]] ; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 ; CHECK-NEXT: ret i32* [[EE]] @@ -561,7 +558,7 @@ define i32* @gep_cvbase_w_s_idx(<2 x i32*> %base, i64 %raw_addr) { ; CHECK-LABEL: @gep_cvbase_w_s_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> , i64 [[RAW_ADDR:%.*]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> , i64 [[RAW_ADDR:%.*]] ; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 ; CHECK-NEXT: ret i32* [[EE]] ; @@ -582,7 +579,7 @@ define i32* @gep_sbase_w_cv_idx(i32* %base) { ; CHECK-LABEL: @gep_sbase_w_cv_idx( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> ; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 ; CHECK-NEXT: ret i32* [[EE]] ; @@ -593,8 +590,7 @@ define i32* @gep_sbase_w_splat_idx(i32* %base, i64 %idx) { ; CHECK-LABEL: @gep_sbase_w_splat_idx( -; CHECK-NEXT: [[IDXVEC1:%.*]] = insertelement <2 x i64> undef, i64 [[IDX:%.*]], i32 0 -; CHECK-NEXT: [[IDXVEC2:%.*]] = shufflevector <2 x i64> [[IDXVEC1]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[IDXVEC2:%.*]] = insertelement <2 x i64> undef, i64 [[IDX:%.*]], i32 1 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> [[IDXVEC2]] ; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 ; CHECK-NEXT: ret i32* [[EE]] @@ -607,10 +603,8 @@ } define i32* @gep_splat_both(i32* %base, i64 %idx) { ; CHECK-LABEL: @gep_splat_both( -; CHECK-NEXT: [[BASEVEC1:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 0 -; CHECK-NEXT: [[BASEVEC2:%.*]] = shufflevector <2 x i32*> [[BASEVEC1]], <2 x i32*> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[IDXVEC1:%.*]] = insertelement <2 x i64> undef, i64 [[IDX:%.*]], i32 0 -; CHECK-NEXT: [[IDXVEC2:%.*]] = shufflevector <2 x i64> [[IDXVEC1]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1 +; CHECK-NEXT: [[IDXVEC2:%.*]] = insertelement <2 x i64> undef, i64 [[IDX:%.*]], i32 1 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], <2 x i64> [[IDXVEC2]] ; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1 ; CHECK-NEXT: ret i32* [[EE]]