Index: include/llvm/IR/Instructions.h =================================================================== --- include/llvm/IR/Instructions.h +++ include/llvm/IR/Instructions.h @@ -837,10 +837,12 @@ if (!PointeeType) PointeeType = cast(Ptr->getType()->getScalarType())->getElementType(); + else if (PointeeType->isVectorTy() == Ptr->getType()->isVectorTy()) + assert(PointeeType == + cast(Ptr->getType()->getScalarType())->getElementType()->getScalarType()); else - assert( - PointeeType == - cast(Ptr->getType()->getScalarType())->getElementType()); + assert(PointeeType->getScalarType() == + cast(Ptr->getType()->getScalarType())->getElementType()->getScalarType()); return new (Values) GetElementPtrInst(PointeeType, Ptr, IdxList, Values, NameStr, InsertBefore); } @@ -852,10 +854,12 @@ if (!PointeeType) PointeeType = cast(Ptr->getType()->getScalarType())->getElementType(); + else if (PointeeType->isVectorTy() == Ptr->getType()->isVectorTy()) + assert(PointeeType == + cast(Ptr->getType()->getScalarType())->getElementType()->getScalarType()); else - assert( - PointeeType == - cast(Ptr->getType()->getScalarType())->getElementType()); + assert(PointeeType->getScalarType() == + cast(Ptr->getType()->getScalarType())->getElementType()->getScalarType()); return new (Values) GetElementPtrInst(PointeeType, Ptr, IdxList, Values, NameStr, InsertAtEnd); } @@ -971,7 +975,12 @@ unsigned NumElem = cast(Ptr->getType())->getNumElements(); return VectorType::get(PtrTy, NumElem); } - + else + for (Value *Index : IdxList) + if (Index->getType()->isVectorTy()) { + unsigned NumElts = cast(Index->getType())->getNumElements(); + return VectorType::get(PtrTy, NumElts); + } // Scalar GEP return PtrTy; } Index: lib/AsmParser/LLParser.cpp =================================================================== --- lib/AsmParser/LLParser.cpp +++ lib/AsmParser/LLParser.cpp @@ -5541,6 +5541,11 @@ SmallVector Indices; bool AteExtraComma = false; + // GEP returns a vector of pointers if at least one of parameters is a vector. + // All vector parameters should have the same vector width. + unsigned GepWidth = BaseType->isVectorTy() ? + cast(BaseType)->getNumElements() : 0; + while (EatIfPresent(lltok::comma)) { if (Lex.getKind() == lltok::MetadataVar) { AteExtraComma = true; @@ -5549,14 +5554,13 @@ if (ParseTypeAndValue(Val, EltLoc, PFS)) return true; if (!Val->getType()->getScalarType()->isIntegerTy()) return Error(EltLoc, "getelementptr index must be an integer"); - if (Val->getType()->isVectorTy() != Ptr->getType()->isVectorTy()) - return Error(EltLoc, "getelementptr index type missmatch"); + if (Val->getType()->isVectorTy()) { unsigned ValNumEl = cast(Val->getType())->getNumElements(); - unsigned PtrNumEl = cast(Ptr->getType())->getNumElements(); - if (ValNumEl != PtrNumEl) + if (GepWidth && GepWidth != ValNumEl) return Error(EltLoc, "getelementptr vector index has a wrong number of elements"); + GepWidth = ValNumEl; } Indices.push_back(Val); } Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2760,6 +2760,16 @@ SDValue N = getValue(Op0); SDLoc dl = getCurSDLoc(); + // Normalize Vector GEP - all scalar operands should be converted to the + // splat vector + unsigned VectorWidth = + I.getType()->isVectorTy()? cast(I.getType())->getVectorNumElements() : 0; + + if (VectorWidth && !N.getValueType().isVector()) { + MVT VT = MVT::getVectorVT(N.getValueType().getSimpleVT(), VectorWidth); + SmallVector Ops(VectorWidth, N); + N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + } for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { const Value *Idx = *OI; @@ -2780,11 +2790,18 @@ APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty)); // If this is a constant subscript, handle it quickly. - if (const auto *CI = dyn_cast(Idx)) { + const auto *CI = dyn_cast(Idx); + if (!CI && isa(Idx) && + cast(Idx)->getSplatValue()) + CI = cast(cast(Idx)->getSplatValue()); + + if (CI) { if (CI->isZero()) continue; APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); - SDValue OffsVal = DAG.getConstant(Offs, dl, PtrTy); + SDValue OffsVal = VectorWidth ? + DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) : + DAG.getConstant(Offs, dl, PtrTy); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal); continue; } @@ -2792,6 +2809,11 @@ // N = N + Idx * ElementSize; SDValue IdxN = getValue(Idx); + if (!IdxN.getValueType().isVector() && VectorWidth) { + MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth); + SmallVector Ops(VectorWidth, IdxN); + IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + } // If the index is smaller or larger than intptr_t, truncate or extend // it. IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); @@ -3062,48 +3084,63 @@ static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index, SelectionDAGBuilder* SDB) { + SelectionDAG& DAG = SDB->DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type"); GetElementPtrInst *Gep = dyn_cast(Ptr); if (!Gep || Gep->getNumOperands() > 2) return false; - ShuffleVectorInst *ShuffleInst = - dyn_cast(Gep->getPointerOperand()); - if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() || - cast(ShuffleInst->getOperand(0))->getOpcode() != - Instruction::InsertElement) + + Value *GepBasePtr = Gep->getPointerOperand(); + Value *IndexVal = Gep->getOperand(1); + // Gep instruction may be located in another basic block, check if we can find + // SDValue for all operands + if (!SDB->findValue(GepBasePtr) || !SDB->findValue(IndexVal)) return false; - Ptr = cast(ShuffleInst->getOperand(0))->getOperand(1); - - SelectionDAG& DAG = SDB->DAG; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - // Check is the Ptr is inside current basic block - // If not, look for the shuffle instruction - if (SDB->findValue(Ptr)) + // If GEP base is scalar - this is a single base + if (!GepBasePtr->getType()->isVectorTy()) { + Ptr = GepBasePtr; Base = SDB->getValue(Ptr); - else if (SDB->findValue(ShuffleInst)) { - SDValue ShuffleNode = SDB->getValue(ShuffleInst); - SDLoc sdl = ShuffleNode; - Base = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl, - ShuffleNode.getValueType().getScalarType(), ShuffleNode, - DAG.getConstant(0, sdl, TLI.getVectorIdxTy())); - SDB->setValue(Ptr, Base); } - else - return false; - - Value *IndexVal = Gep->getOperand(1); - if (SDB->findValue(IndexVal)) { - Index = SDB->getValue(IndexVal); + else { + ShuffleVectorInst *ShuffleInst = dyn_cast(GepBasePtr); + if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() || + cast(ShuffleInst->getOperand(0))->getOpcode() != + Instruction::InsertElement) + return false; - if (SExtInst* Sext = dyn_cast(IndexVal)) { - IndexVal = Sext->getOperand(0); - if (SDB->findValue(IndexVal)) - Index = SDB->getValue(IndexVal); + Ptr = cast(ShuffleInst->getOperand(0))->getOperand(1); + // Check is the Ptr is inside current basic block + // If not, look for the shuffle instruction + if (SDB->findValue(Ptr)) + Base = SDB->getValue(Ptr); + else { + SDValue ShuffleNode = SDB->getValue(ShuffleInst); + SDLoc sdl = ShuffleNode; + Base = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl, + ShuffleNode.getValueType().getScalarType(), ShuffleNode, + DAG.getConstant(0, sdl, TLI.getVectorIdxTy())); + SDB->setValue(Ptr, Base); } - return true; } - return false; + + Index = SDB->getValue(IndexVal); + + if (SExtInst* Sext = dyn_cast(IndexVal)) { + IndexVal = Sext->getOperand(0); + if (SDB->findValue(IndexVal)) + Index = SDB->getValue(IndexVal); + } + if (!Index.getValueType().isVector()) { + unsigned VectorWidth = + cast(Gep->getType())->getVectorNumElements(); + MVT VT = MVT::getVectorVT(Index.getValueType().getSimpleVT(), VectorWidth); + SmallVector Ops(VectorWidth, Index); + Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops); + } + return true; } void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { Index: lib/IR/Verifier.cpp =================================================================== --- lib/IR/Verifier.cpp +++ lib/IR/Verifier.cpp @@ -2540,10 +2540,6 @@ Assert(isa(TargetTy), "GEP base pointer is not a vector or a vector of pointers", &GEP); Assert(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP); - Assert(GEP.getPointerOperandType()->isVectorTy() == - GEP.getType()->isVectorTy(), - "Vector GEP must return a vector value", &GEP); - SmallVector Idxs(GEP.idx_begin(), GEP.idx_end()); Type *ElTy = GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs); @@ -2553,17 +2549,18 @@ GEP.getResultElementType() == ElTy, "GEP is not of right type for indices!", &GEP, ElTy); - if (GEP.getPointerOperandType()->isVectorTy()) { + if (GEP.getType()->isVectorTy()) { // Additional checks for vector GEPs. - unsigned GepWidth = GEP.getPointerOperandType()->getVectorNumElements(); - Assert(GepWidth == GEP.getType()->getVectorNumElements(), - "Vector GEP result width doesn't match operand's", &GEP); + unsigned GepWidth = cast(GEP.getType())->getVectorNumElements(); + if (GEP.getPointerOperandType()->isVectorTy()) + Assert(GepWidth == GEP.getPointerOperandType()->getVectorNumElements(), + "Vector GEP result width doesn't match operand's", &GEP); for (unsigned i = 0, e = Idxs.size(); i != e; ++i) { Type *IndexTy = Idxs[i]->getType(); - Assert(IndexTy->isVectorTy(), "Vector GEP must have vector indices!", - &GEP); - unsigned IndexWidth = IndexTy->getVectorNumElements(); - Assert(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP); + if (IndexTy->isVectorTy()) { + unsigned IndexWidth = cast(IndexTy)->getVectorNumElements(); + Assert(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP); + } } } visitInstruction(GEP); Index: test/Assembler/getelementptr_vec_idx1.ll =================================================================== --- test/Assembler/getelementptr_vec_idx1.ll +++ test/Assembler/getelementptr_vec_idx1.ll @@ -1,8 +1,8 @@ ; RUN: not llvm-as < %s >/dev/null 2> %t ; RUN: FileCheck %s < %t -; Test that a vector index is only used with a vector pointer. +; Test that a vector GEP may be used with a scalar base, the result is a vector of pointers -; CHECK: getelementptr index type missmatch +; CHECK: '%w' defined with type '<2 x i32*> define i32 @test(i32* %a) { %w = getelementptr i32, i32* %a, <2 x i32> Index: test/Assembler/getelementptr_vec_idx2.ll =================================================================== --- test/Assembler/getelementptr_vec_idx2.ll +++ test/Assembler/getelementptr_vec_idx2.ll @@ -1,10 +1,24 @@ ; RUN: not llvm-as < %s >/dev/null 2> %t ; RUN: FileCheck %s < %t -; Test that a vector pointer is only used with a vector index. +; Test that a vector pointer may be used with a scalar index. +; Test that a vector pointer and vector index should have the same vector width -; CHECK: getelementptr index type missmatch - -define <2 x i32> @test(<2 x i32*> %a) { +; This code is correct +define <2 x i32*> @test2(<2 x i32*> %a) { %w = getelementptr i32, <2 x i32*> %a, i32 2 + ret <2 x i32*> %w +} + +; This code is correct +define <2 x i32*> @test3(i32* %a) { + %w = getelementptr i32, i32* %a, <2 x i32> + ret <2 x i32*> %w +} + +; CHECK: getelementptr vector index has a wrong number of elements + +define <2 x i32> @test1(<2 x i32*> %a) { + %w = getelementptr i32, <2 x i32*> %a, <4 x i32> ret <2 x i32> %w } + Index: test/Assembler/getelementptr_vec_idx3.ll =================================================================== --- test/Assembler/getelementptr_vec_idx3.ll +++ test/Assembler/getelementptr_vec_idx3.ll @@ -1,8 +1,8 @@ ; RUN: not llvm-as < %s >/dev/null 2> %t ; RUN: FileCheck %s < %t -; Test that vector indices have the same number of elements as the pointer. +; Test that a vector GEP may be used with a scalar base, the result is a vector of pointers -; CHECK: getelementptr index type missmatch +; CHECK: '%w' defined with type '<2 x <4 x i32>*>' define <4 x i32> @test(<4 x i32>* %a) { %w = getelementptr <4 x i32>, <4 x i32>* %a, <2 x i32> Index: test/CodeGen/X86/masked_gather_scatter.ll =================================================================== --- test/CodeGen/X86/masked_gather_scatter.ll +++ test/CodeGen/X86/masked_gather_scatter.ll @@ -140,3 +140,78 @@ %res = add <16 x i32> %gt1, %gt2 ret <16 x i32> %res } + +%struct.RT = type { i8, [10 x [20 x i32]], i8 } +%struct.ST = type { i32, double, %struct.RT } + +; Test9 and Test10 should give the same result + +; KNL-LABEL: test9 +; KNL: vpbroadcastq %rdi, %zmm +; KNL: vpmovsxdq +; KNL: vpbroadcastq +; KNL: vpmuludq +; KNL: vpaddq +; KNL: vpaddq +; KNL: vpaddq +; KNL: vpaddq +; KNL: vpgatherqd (,%zmm + +define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) { +entry: + %broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0 + %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer + + %arrayidx = getelementptr %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %ind1, <8 x i32> , <8 x i32>, <8 x i32> %ind5, <8 x i64> + %res = call <8 x i32 > @llvm.masked.gather.v8i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> , <8 x i32> undef) + ret <8 x i32> %res +} + +; KNL-LABEL: test10 +; KNL: vpbroadcastq %rdi, %zmm +; KNL: vpmovsxdq +; KNL: vpbroadcastq +; KNL: vpmuludq +; KNL: vpaddq +; KNL: vpaddq +; KNL: vpaddq +; KNL: vpaddq +; KNL: vpgatherqd (,%zmm +define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) { +entry: + %broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0 + %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer + + %arrayidx = getelementptr %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %i1, i32 2, i32 1, <8 x i32> %ind5, i64 13 + %res = call <8 x i32 > @llvm.masked.gather.v8i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> , <8 x i32> undef) + ret <8 x i32> %res +} + +; KNL-LABEL: test11 +; KNL: vpbroadcastd %esi, %zmm +; KNL: vgatherdps (%rdi,%zmm + +define <16 x float> @test11(float* %base, i32 %ind) { + + %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 + %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer + + %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind + + %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) + ret <16 x float>%res +} + +; KNL-LABEL: test12 +; KNL: vgatherdps (%rdi,%zmm +define <16 x float> @test12(float* %base, <16 x i32> %ind) { + + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) + ret <16 x float>%res +} + + + Index: test/CodeGen/X86/vector-gep.ll =================================================================== --- test/CodeGen/X86/vector-gep.ll +++ test/CodeGen/X86/vector-gep.ll @@ -92,3 +92,36 @@ ;CHECK: ret } +;CHECK-LABEL: AGEP7: +define <4 x i8*> @AGEP7(<4 x i8*> %param, i32 %off) nounwind { +entry: +;CHECK-LABEL: AGEP7 +;CHECK-NOT: pslld + %A = getelementptr i8, <4 x i8*> %param, i32 %off + ret <4 x i8*> %A +;CHECK: ret +} + +;CHECK-LABEL: AGEP8: +define <4 x i8*> @AGEP8(<4 x i8*> %param, i8 %off) nounwind { +entry: +;CHECK-LABEL: AGEP8 +;CHECK: vpaddd + %A = getelementptr i8, <4 x i8*> %param, i8 %off + ret <4 x i8*> %A +;CHECK: ret +} + +;CHECK-LABEL: AGEP9: +define <4 x i16*> @AGEP9(i16* %param, <4 x i32> %off) nounwind { +entry: +;CHECK-LABEL: AGEP9 +; Multiply offset by two (add it to itself). +;CHECK: vpadd +; add the base to the offset +;CHECK: vbroadcastss +;CHECK-NEXT: vpadd + %A = getelementptr i16, i16* %param, <4 x i32> %off + ret <4 x i16*> %A +;CHECK: ret +}