Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3871,7 +3871,7 @@ assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); const GetElementPtrInst *GEP = dyn_cast(Ptr); - if (!GEP || GEP->getNumOperands() > 2) + if (!GEP) return false; const Value *GEPPtr = GEP->getPointerOperand(); @@ -3880,7 +3880,14 @@ else if (!(Ptr = getSplatValue(GEPPtr))) return false; - Value *IndexVal = GEP->getOperand(1); + unsigned FinalIndex = GEP->getNumOperands() - 1; + Value *IndexVal = GEP->getOperand(FinalIndex); + + // Ensure all the other indices are 0. + for (unsigned i = 1; i < FinalIndex; ++i) + if (auto *C = dyn_cast(GEP->getOperand(i))) + if (!C->isZero()) + return false; // The operands of the GEP may be defined in another basic block. // In this case we'll not find nodes for the operands. Index: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll +++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll @@ -2330,46 +2330,34 @@ define <8 x i32> @test_global_array(<8 x i64> %indxs) { ; KNL_64-LABEL: test_global_array: ; KNL_64: # BB#0: -; KNL_64-NEXT: vpsllq $2, %zmm0, %zmm0 ; KNL_64-NEXT: movl $glob_array, %eax -; KNL_64-NEXT: vpbroadcastq %rax, %zmm1 -; KNL_64-NEXT: vpaddq %zmm0, %zmm1, %zmm1 ; KNL_64-NEXT: kxnorw %k0, %k0, %k1 -; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} +; KNL_64-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1} +; KNL_64-NEXT: vmovdqa %ymm1, %ymm0 ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test_global_array: ; KNL_32: # BB#0: -; KNL_32-NEXT: vpmovqd %zmm0, %ymm0 -; KNL_32-NEXT: vpslld $2, %ymm0, %ymm0 ; KNL_32-NEXT: movl $glob_array, %eax -; KNL_32-NEXT: vmovd %eax, %xmm1 -; KNL_32-NEXT: vpbroadcastd %xmm1, %ymm1 -; KNL_32-NEXT: vpaddd %ymm0, %ymm1, %ymm0 -; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1 ; KNL_32-NEXT: kxnorw %k0, %k0, %k1 -; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} +; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1} +; KNL_32-NEXT: vmovdqa %ymm1, %ymm0 ; KNL_32-NEXT: retl ; ; SKX-LABEL: test_global_array: ; SKX: # BB#0: -; SKX-NEXT: vpsllq $2, %zmm0, %zmm0 ; SKX-NEXT: movl $glob_array, %eax -; SKX-NEXT: vpbroadcastq %rax, %zmm1 -; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm1 ; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} +; SKX-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1} +; SKX-NEXT: vmovdqa %ymm1, %ymm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test_global_array: ; SKX_32: # BB#0: ; SKX_32-NEXT: movl $glob_array, %eax -; SKX_32-NEXT: vpbroadcastd %eax, %ymm1 -; SKX_32-NEXT: vpmovqd %zmm0, %ymm0 -; SKX_32-NEXT: vpslld $2, %ymm0, %ymm0 -; SKX_32-NEXT: vpaddd %ymm0, %ymm1, %ymm1 ; SKX_32-NEXT: kxnorw %k0, %k0, %k1 -; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm0 {%k1} +; SKX_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1} +; SKX_32-NEXT: vmovdqa %ymm1, %ymm0 ; SKX_32-NEXT: retl %p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <8 x i64> %indxs %g = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %p, i32 8, <8 x i1> , <8 x i32> undef)