Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1680,9 +1680,42 @@ // invariant: this breaks the dependence between GEPs and allows LICM // to hoist the invariant part out of the loop. if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) { - Src->setOperand(1, GO1); - GEP.setOperand(1, SO1); - return &GEP; + // We have to be careful here. + // We have something like: + // %src = getelementptr , * %base, %idx + // %gep = getelementptr , * %src, %idx2 + // If we just swap idx & idx2 then we could inadvertantly + // change %src from a vector to a scalar, or vice versa. + // Cases: + // 1) %base a scalar & idx a scalar & idx2 a vector + // => Swapping idx & idx2 turns %src into a vector type. + // 2) %base a scalar & idx a vector & idx2 a scalar + // => Swapping idx & idx2 turns %src in a scalar type + // 3) %base, %idx, and %idx2 are scalars + // => %src & %gep are scalars + // => swapping idx & idx2 is safe + // 4) %base a vector + // => %src is a vector + // => swapping idx & idx2 is safe. + auto *SO0 = Src->getOperand(0); + auto *SO0Ty = SO0->getType(); + if (!isa(GEPType) || // case 3 + isa(SO0Ty)) { // case 4 + Src->setOperand(1, GO1); + GEP.setOperand(1, SO1); + return &GEP; + } else { + // Case 1 or 2 + // -- have to recreate %src & %gep + // put NewSrc at same location as %src + Builder.SetInsertPoint(cast(PtrOp)); + auto *NewSrc = cast( + Builder.CreateGEP(SO0, GO1, Src->getName())); + NewSrc->setIsInBounds(Src->isInBounds()); + auto *NewGEP = GetElementPtrInst::Create(nullptr, NewSrc, {SO1}); + NewGEP->setIsInBounds(GEP.isInBounds()); + return NewGEP; + } } } } Index: test/Transforms/InstCombine/gep-combine-loop-invariant.ll =================================================================== --- test/Transforms/InstCombine/gep-combine-loop-invariant.ll +++ test/Transforms/InstCombine/gep-combine-loop-invariant.ll @@ -88,3 +88,100 @@ %cont.0 = phi i32 [ 1, %entry ], [ 0, %if.then ], [ 0, %land.lhs.true ], [ 1, %do.body ] ret i32 %cont.0 } + +declare void @blackhole(<2 x i8*>) + +define void @PR37005(i8* %base, i8** %in) { +; CHECK-LABEL: @PR37005( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[E2:%.*]] = getelementptr inbounds i8*, i8** [[IN:%.*]], i64 undef +; CHECK-NEXT: [[E4:%.*]] = getelementptr inbounds i8*, i8** [[E2]], <2 x i64> +; CHECK-NEXT: [[PI1:%.*]] = ptrtoint <2 x i8**> [[E4]] to <2 x i64> +; CHECK-NEXT: [[LR1:%.*]] = lshr <2 x i64> [[PI1]], +; CHECK-NEXT: [[SL1:%.*]] = shl nuw nsw <2 x i64> [[LR1]], +; CHECK-NEXT: [[E51:%.*]] = getelementptr inbounds i8, i8* [[BASE:%.*]], i64 80 +; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, i8* [[E51]], <2 x i64> [[SL1]] +; CHECK-NEXT: call void @blackhole(<2 x i8*> [[E6]]) +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + %e1 = getelementptr inbounds i8*, i8** %in, i64 undef + %e2 = getelementptr inbounds i8*, i8** %e1, i64 6 + %bc1 = bitcast i8** %e2 to <2 x i8*>* + %e3 = getelementptr inbounds <2 x i8*>, <2 x i8*>* %bc1, i64 0, i64 0 + %e4 = getelementptr inbounds i8*, i8** %e3, <2 x i64> + %pi1 = ptrtoint <2 x i8**> %e4 to <2 x i64> + %lr1 = lshr <2 x i64> %pi1, + %sl1 = shl nuw nsw <2 x i64> %lr1, + %e5 = getelementptr inbounds i8, i8* %base, <2 x i64> %sl1 + %e6 = getelementptr inbounds i8, <2 x i8*> %e5, i64 80 + call void @blackhole(<2 x i8*> %e6) + br label %loop +} + +define void @PR37005_2(i8* %base, i8** %in) { +; CHECK-LABEL: @PR37005_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[E2:%.*]] = getelementptr inbounds i8*, i8** [[IN:%.*]], i64 undef +; CHECK-NEXT: [[PI1:%.*]] = ptrtoint i8** [[E2]] to i64 +; CHECK-NEXT: [[LR1:%.*]] = lshr i64 [[PI1]], 21 +; CHECK-NEXT: [[SL1:%.*]] = shl nuw nsw i64 [[LR1]], 7 +; CHECK-NEXT: [[E51:%.*]] = getelementptr inbounds i8, i8* [[BASE:%.*]], <2 x i64> +; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, <2 x i8*> [[E51]], i64 [[SL1]] +; CHECK-NEXT: call void @blackhole(<2 x i8*> [[E6]]) +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + %e1 = getelementptr inbounds i8*, i8** %in, i64 undef + %e2 = getelementptr inbounds i8*, i8** %e1, i64 6 + %pi1 = ptrtoint i8** %e2 to i64 + %lr1 = lshr i64 %pi1, 21 + %sl1 = shl nuw nsw i64 %lr1, 7 + %e5 = getelementptr inbounds i8, i8* %base, i64 %sl1 + %e6 = getelementptr inbounds i8, i8* %e5, <2 x i64> + call void @blackhole(<2 x i8*> %e6) + br label %loop +} + +define void @PR37005_3(<2 x i8*> %base, i8** %in) { +; CHECK-LABEL: @PR37005_3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[E2:%.*]] = getelementptr inbounds i8*, i8** [[IN:%.*]], i64 undef +; CHECK-NEXT: [[E4:%.*]] = getelementptr inbounds i8*, i8** [[E2]], <2 x i64> +; CHECK-NEXT: [[PI1:%.*]] = ptrtoint <2 x i8**> [[E4]] to <2 x i64> +; CHECK-NEXT: [[LR1:%.*]] = lshr <2 x i64> [[PI1]], +; CHECK-NEXT: [[SL1:%.*]] = shl nuw nsw <2 x i64> [[LR1]], +; CHECK-NEXT: [[E5:%.*]] = getelementptr inbounds i8, <2 x i8*> [[BASE:%.*]], i64 80 +; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, <2 x i8*> [[E5]], <2 x i64> [[SL1]] +; CHECK-NEXT: call void @blackhole(<2 x i8*> [[E6]]) +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + %e1 = getelementptr inbounds i8*, i8** %in, i64 undef + %e2 = getelementptr inbounds i8*, i8** %e1, i64 6 + %bc1 = bitcast i8** %e2 to <2 x i8*>* + %e3 = getelementptr inbounds <2 x i8*>, <2 x i8*>* %bc1, i64 0, i64 0 + %e4 = getelementptr inbounds i8*, i8** %e3, <2 x i64> + %pi1 = ptrtoint <2 x i8**> %e4 to <2 x i64> + %lr1 = lshr <2 x i64> %pi1, + %sl1 = shl nuw nsw <2 x i64> %lr1, + %e5 = getelementptr inbounds i8, <2 x i8*> %base, <2 x i64> %sl1 + %e6 = getelementptr inbounds i8, <2 x i8*> %e5, i64 80 + call void @blackhole(<2 x i8*> %e6) + br label %loop +}