Index: lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCompares.cpp +++ lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -909,8 +909,15 @@ } // If all indices are the same, just compare the base pointers. - if (IndicesTheSame) - return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0)); + if (IndicesTheSame) { + ICmpInst *ICmp = new ICmpInst(Cond, + GEPLHS->getOperand(0), + GEPRHS->getOperand(0)); + // Because of vector icmps, we have to check types. + if (I.getType() == ICmp->getType()) + return ICmp; + delete ICmp; + } // If we're comparing GEPs with two base pointers that only differ in type // and both GEPs have only constant indices or just one use, then fold Index: test/Transforms/InstCombine/pr38984.ll =================================================================== --- test/Transforms/InstCombine/pr38984.ll +++ test/Transforms/InstCombine/pr38984.ll @@ -2,24 +2,40 @@ ; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "p:16:16" -@offsets = external dso_local global [4 x i16], align 1 +@a = external global [21 x i16], align 1 +@offsets = external global [4 x i16], align 1 -define void @PR38984() { -; CHECK-LABEL: @PR38984( +; The "same gep" optimization should work with vector icmp. +define <4 x i1> @PR38984_1() { +; CHECK-LABEL: @PR38984_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret void +; CHECK-NEXT: ret <4 x i1> ; entry: %0 = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef), align 1 %1 = insertelement <4 x i16> undef, i16 %0, i32 3 - %2 = sub <4 x i16> zeroinitializer, %1 - %3 = sext <4 x i16> %2 to <4 x i32> - %4 = getelementptr inbounds i64, i64* null, <4 x i32> %3 - %5 = ptrtoint <4 x i64*> %4 to <4 x i32> - %6 = getelementptr inbounds i64, i64* null, <4 x i16> %2 - %7 = ptrtoint <4 x i64*> %6 to <4 x i32> - %8 = icmp eq <4 x i32> %5, %7 - %9 = select <4 x i1> %8, <4 x i16> zeroinitializer, <4 x i16> - %10 = sext <4 x i16> %9 to <4 x i32> - ret void + %2 = getelementptr i32, i32* null, <4 x i16> %1 + %3 = getelementptr i32, i32* null, <4 x i16> %1 + %4 = icmp eq <4 x i32*> %2, %3 + ret <4 x i1> %4 +} + +; The "compare base pointers" optimization should not kick in for vector icmp. +define <4 x i1> @PR38984_2() { +; CHECK-LABEL: @PR38984_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef), align 2 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, i16* getelementptr inbounds ([21 x i16], [21 x i16]* @a, i16 1, i16 0), <4 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, i16* null, <4 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i16*> [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret <4 x i1> [[TMP4]] +; +entry: + %0 = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef) + %1 = insertelement <4 x i16> undef, i16 %0, i32 3 + %2 = getelementptr i16, i16* getelementptr ([21 x i16], [21 x i16]* @a, i64 1, i32 0), <4 x i16> %1 + %3 = getelementptr i16, i16* null, <4 x i16> %1 + %4 = icmp eq <4 x i16*> %2, %3 + ret <4 x i1> %4 }