Index: lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1173,18 +1173,35 @@ // operands we may have. We know there must be at least one, or we // wouldn't have a vector result to get here. Note that we intentionally // merge the undef bits here since gepping with either an undef base or - // index results in undef. - for (unsigned i = 0; i < I->getNumOperands(); i++) { - if (isa(I->getOperand(i))) { + // index results in undef. + + auto simplifyGEPOperand = [&](unsigned Idx, bool IsIndexStruct) { + if (isa(I->getOperand(Idx))) { // If the entire vector is undefined, just return this info. UndefElts = EltMask; - return nullptr; + return true; } - if (I->getOperand(i)->getType()->isVectorTy()) { + + if (!IsIndexStruct && I->getOperand(Idx)->getType()->isVectorTy()) { + // If we have a vector of indices into a struct element of the GEP, and + // change a single element of this into an undef while preserving the + // others, that breaks the guarantee that each index of a + // vector-of-pointers into a struct will have the same index. APInt UndefEltsOp(VWidth, 0); - simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp); + simplifyAndSetOp(I, Idx, DemandedElts, UndefEltsOp); UndefElts |= UndefEltsOp; } + + return false; + }; + + if (simplifyGEPOperand(0, false)) + return nullptr; + + gep_type_iterator GTI = gep_type_begin(cast(I)); + for (unsigned Idx = 1; Idx < I->getNumOperands(); Idx++, GTI++) { + if (simplifyGEPOperand(Idx, GTI.isStruct())) + return nullptr; } break; Index: test/Transforms/InstCombine/vec_demanded_elts.ll =================================================================== --- test/Transforms/InstCombine/vec_demanded_elts.ll +++ test/Transforms/InstCombine/vec_demanded_elts.ll @@ -638,3 +638,36 @@ %ee = extractelement <2 x i32*> %gep, i32 1 ret i32* %ee } + +%foo = type { float, i8 } + +define void @gep_vector_of_pointers_to_struct(float* %out, [2 x %foo]* %in) { +; CHECK-LABEL: @gep_vector_of_pointers_to_struct( +; CHECK-NEXT: [[B:%.*]] = insertelement <2 x [2 x %foo]*> undef, [2 x %foo]* [[IN:%.*]], i32 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x %foo], <2 x [2 x %foo]*> [[B]], <2 x i64> , <2 x i64> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[BC:%.*]] = bitcast <2 x float*> [[GEP]] to <2 x i32*> +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32*> [[BC]], i64 1 +; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[OUT:%.*]] to i32* +; CHECK-NEXT: store i32 [[LOAD1]], i32* [[TMP2]], align 4 +; CHECK-NEXT: ret void +; + %a = insertelement <2 x [2 x %foo]*> undef, [2 x %foo]* %in, i32 0 + %b = insertelement <2 x [2 x %foo]*> %a, [2 x %foo]* %in, i32 1 + %gep = getelementptr [2 x %foo], <2 x [2 x %foo]*> %b, <2 x i32> zeroinitializer, <2 x i32> , <2 x i32> zeroinitializer + %extract = extractelement <2 x float*> %gep, i64 1 + %load = load float, float* %extract, align 4 + store float %load, float* %out + ret void +} + +define i32* @PR41624(<2 x { i32, i32 }*> %a) { +; CHECK-LABEL: @PR41624( +; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x { i32, i32 }*> [[A:%.*]], <2 x i64> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i32*> [[W]], i1 false +; CHECK-NEXT: ret i32* [[R]] +; + %w = getelementptr { i32, i32 }, <2 x { i32, i32 }*> %a, <2 x i64> , <2 x i32> + %r = extractelement <2 x i32*> %w, i1 0 + ret i32* %r +}