Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -194,6 +194,7 @@ bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); bool matchWrapper(SDValue N, X86ISelAddressMode &AM); bool matchAddress(SDValue N, X86ISelAddressMode &AM); + bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM); bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth); bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth); @@ -1502,22 +1503,34 @@ return false; } +/// Helper for selectVectorAddr. Handles things that can be folded into a +/// gather scatter address. The index register and scale should have already +/// been handled. +bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) { + // TODO: Support other operations. + switch (N.getOpcode()) { + case X86ISD::Wrapper: + if (!matchWrapper(N, AM)) + return false; + break; + } + + return matchAddressBase(N, AM); +} + bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { - unsigned ScalarSize; + X86ISelAddressMode AM; if (auto Mgs = dyn_cast(Parent)) { - Base = Mgs->getBasePtr(); - Index = Mgs->getIndex(); - ScalarSize = Mgs->getValue().getScalarValueSizeInBits(); + AM.IndexReg = Mgs->getIndex(); + AM.Scale = Mgs->getValue().getScalarValueSizeInBits() / 8; } else { auto X86Gather = cast(Parent); - Base = X86Gather->getBasePtr(); - Index = X86Gather->getIndex(); - ScalarSize = X86Gather->getValue().getScalarValueSizeInBits(); + AM.IndexReg = X86Gather->getIndex(); + AM.Scale = X86Gather->getValue().getScalarValueSizeInBits() / 8; } - X86ISelAddressMode AM; unsigned AddrSpace = cast(Parent)->getPointerInfo().getAddrSpace(); // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS. if (AddrSpace == 256) @@ -1527,21 +1540,23 @@ if (AddrSpace == 258) AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16); - SDLoc DL(N); - Scale = getI8Imm(ScalarSize/8, DL); - // If Base is 0, the whole address is in index and the Scale is 1 - if (isa(Base)) { - assert(cast(Base)->isNullValue() && + if (isa(N)) { + assert(cast(N)->isNullValue() && "Unexpected base in gather/scatter"); - Scale = getI8Imm(1, DL); - Base = CurDAG->getRegister(0, MVT::i32); + AM.Scale = 1; + } else { + if (matchVectorAddress(N, AM)) + return false; } - if (AM.Segment.getNode()) - Segment = AM.Segment; - else - Segment = CurDAG->getRegister(0, MVT::i32); - Disp = CurDAG->getTargetConstant(0, DL, MVT::i32); + + MVT VT = N.getSimpleValueType(); + if (AM.BaseType == X86ISelAddressMode::RegBase) { + if (!AM.Base_Reg.getNode()) + AM.Base_Reg = CurDAG->getRegister(0, VT); + } + + getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment); return true; } Index: test/CodeGen/X86/masked_gather_scatter.ll =================================================================== --- test/CodeGen/X86/masked_gather_scatter.ll +++ test/CodeGen/X86/masked_gather_scatter.ll @@ -2330,33 +2330,29 @@ define <8 x i32> @test_global_array(<8 x i64> %indxs) { ; KNL_64-LABEL: test_global_array: ; KNL_64: # BB#0: -; KNL_64-NEXT: movl $glob_array, %eax ; KNL_64-NEXT: kxnorw %k0, %k0, %k1 -; KNL_64-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1} +; KNL_64-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} ; KNL_64-NEXT: vmovdqa %ymm1, %ymm0 ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test_global_array: ; KNL_32: # BB#0: -; KNL_32-NEXT: movl $glob_array, %eax ; KNL_32-NEXT: kxnorw %k0, %k0, %k1 -; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1} +; KNL_32-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} ; KNL_32-NEXT: vmovdqa %ymm1, %ymm0 ; KNL_32-NEXT: retl ; ; SKX-LABEL: test_global_array: ; SKX: # BB#0: -; SKX-NEXT: movl $glob_array, %eax ; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1} +; SKX-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} ; SKX-NEXT: vmovdqa %ymm1, %ymm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test_global_array: ; SKX_32: # BB#0: -; SKX_32-NEXT: movl $glob_array, %eax ; SKX_32-NEXT: kxnorw %k0, %k0, %k1 -; SKX_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1} +; SKX_32-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} ; SKX_32-NEXT: vmovdqa %ymm1, %ymm0 ; SKX_32-NEXT: retl %p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <8 x i64> %indxs