Diff 258215

llvm/lib/CodeGen/CodeGenPrepare.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 362 Lines • ▼ Show 20 Lines	private:
bool canMergeBlocks(const BasicBlock BB, const BasicBlock DestBB) const;		bool canMergeBlocks(const BasicBlock BB, const BasicBlock DestBB) const;
void eliminateMostlyEmptyBlock(BasicBlock *BB);		void eliminateMostlyEmptyBlock(BasicBlock *BB);
bool isMergingEmptyBlockProfitable(BasicBlock BB, BasicBlock DestBB,		bool isMergingEmptyBlockProfitable(BasicBlock BB, BasicBlock DestBB,
bool isPreheader);		bool isPreheader);
bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);		bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
bool optimizeInst(Instruction *I, bool &ModifiedDT);		bool optimizeInst(Instruction *I, bool &ModifiedDT);
bool optimizeMemoryInst(Instruction MemoryInst, Value Addr,		bool optimizeMemoryInst(Instruction MemoryInst, Value Addr,
Type *AccessTy, unsigned AddrSpace);		Type *AccessTy, unsigned AddrSpace);
		bool optimizeGatherScatterInst(Instruction MemoryInst, Value Ptr);
bool optimizeInlineAsmInst(CallInst *CS);		bool optimizeInlineAsmInst(CallInst *CS);
bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);		bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
bool optimizeExt(Instruction *&I);		bool optimizeExt(Instruction *&I);
bool optimizeExtUses(Instruction *I);		bool optimizeExtUses(Instruction *I);
bool optimizeLoadExt(LoadInst *Load);		bool optimizeLoadExt(LoadInst *Load);
bool optimizeShiftInst(BinaryOperator *BO);		bool optimizeShiftInst(BinaryOperator *BO);
bool optimizeSelectInst(SelectInst *SI);		bool optimizeSelectInst(SelectInst *SI);
bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);		bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
▲ Show 20 Lines • Show All 1,657 Lines • ▼ Show 20 Lines	case Intrinsic::vscale: {
auto Null = Constant::getNullValue(ScalableVectorTy->getPointerTo());		auto Null = Constant::getNullValue(ScalableVectorTy->getPointerTo());
auto One = ConstantInt::getSigned(II->getType(), 1);		auto One = ConstantInt::getSigned(II->getType(), 1);
auto *CGep =		auto *CGep =
ConstantExpr::getGetElementPtr(ScalableVectorTy, Null, One);		ConstantExpr::getGetElementPtr(ScalableVectorTy, Null, One);
II->replaceAllUsesWith(ConstantExpr::getPtrToInt(CGep, II->getType()));		II->replaceAllUsesWith(ConstantExpr::getPtrToInt(CGep, II->getType()));
II->eraseFromParent();		II->eraseFromParent();
return true;		return true;
}		}
		break;
}		}
		case Intrinsic::masked_gather:
		return optimizeGatherScatterInst(II, II->getArgOperand(0));
		case Intrinsic::masked_scatter:
		return optimizeGatherScatterInst(II, II->getArgOperand(1));
}		}

SmallVector<Value *, 2> PtrOps;		SmallVector<Value *, 2> PtrOps;
Type *AccessTy;		Type *AccessTy;
if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))		if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
while (!PtrOps.empty()) {		while (!PtrOps.empty()) {
Value *PtrVal = PtrOps.pop_back_val();		Value *PtrVal = PtrOps.pop_back_val();
unsigned AS = PtrVal->getType()->getPointerAddressSpace();		unsigned AS = PtrVal->getType()->getPointerAddressSpace();
▲ Show 20 Lines • Show All 3,124 Lines • ▼ Show 20 Lines	if (IterHandle != CurValue) {
CurInstIterator = BB->begin();		CurInstIterator = BB->begin();
SunkAddrs.clear();		SunkAddrs.clear();
}		}
}		}
++NumMemoryInsts;		++NumMemoryInsts;
return true;		return true;
}		}

		/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
		/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
		spatelUnsubmitted Not Done Reply Inline Actions I haven't looked at this before, so I'm not the best reviewer... But it would be helpful to any first-time reader if this function had a documentation comment to describe the transform. An IR example in the comment and IR regression tests would also be educational. spatel: I haven't looked at this before, so I'm not the best reviewer... But it would be helpful to any…
		/// only handle a 2 operand GEP in the same basic block or a splat constant
		/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
		/// index.
		///
		/// If the existing GEP has a vector base pointer that is splat, we can look
		/// through the splat to find the scalar pointer. If we can't find a scalar
		/// pointer there's nothing we can do.
		///
		/// If we have a GEP with more than 2 indices where the middle indices are all
		/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
		///
		/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
		/// followed by a GEP with an all zeroes vector index. This will enable
		/// SelectionDAGBuilder to use a the scalar GEP as the uniform base and have a
		/// zero index.
		bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
		Value *Ptr) {
		const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
		if (!GEP \|\| !GEP->hasIndices())
		return false;

		// If the GEP and the gather/scatter aren't in the same BB, don't optimize.
		// FIXME: We should support this by sinking the GEP.
		if (MemoryInst->getParent() != GEP->getParent())
		return false;

		SmallVector<Value *, 2> Ops(GEP->op_begin(), GEP->op_end());

		bool RewriteGEP = false;

		if (Ops[0]->getType()->isVectorTy()) {
		Ops[0] = const_cast<Value *>(getSplatValue(Ops[0]));
		if (!Ops[0])
		return false;
		RewriteGEP = true;
		}

		unsigned FinalIndex = Ops.size() - 1;

		efriedmaUnsubmitted Not Done Reply Inline Actions GTI is unused? efriedma: GTI is unused?
		// Ensure all but the last index is 0.
		// FIXME: This isn't strictly required. All that's required is that they are
		// all scalars or splats.
		for (unsigned i = 1; i < FinalIndex; ++i) {
		auto *C = dyn_cast<Constant>(Ops[i]);
		if (!C)
		return false;
		if (isa<VectorType>(C->getType()))
		C = C->getSplatValue();
		efriedmaUnsubmitted Not Done Reply Inline Actions Does it matter whether the value is specifically zero, as opposed to an arbitrary splat value? efriedma: Does it matter whether the value is specifically zero, as opposed to an arbitrary splat value?
		craig.topperAuthorUnsubmitted Done Reply Inline Actions It shouldn't matter, but I was just trying to avoid changing too much relative to the existing SelectionDAGBuilder code in this patch. craig.topper: It shouldn't matter, but I was just trying to avoid changing too much relative to the existing…
		auto *CI = dyn_cast_or_null<ConstantInt>(C);
		if (!CI \|\| !CI->isZero())
		return false;
		// Scalarize the index if needed.
		Ops[i] = CI;
		}

		// Try to scalarize the final index.
		if (Ops[FinalIndex]->getType()->isVectorTy()) {
		if (Value V = const_cast<Value >(getSplatValue(Ops[FinalIndex]))) {
		auto *C = dyn_cast<ConstantInt>(V);
		// Don't scalarize all zeros vector.
		if (!C \|\| !C->isZero()) {
		Ops[FinalIndex] = V;
		RewriteGEP = true;
		efriedmaUnsubmitted Not Done Reply Inline Actions I think it would make sense to unify the handling where the last index is a scalar/splat, rather that splitting it based on whether `GTI.isStruct()` is true. efriedma: I think it would make sense to unify the handling where the last index is a scalar/splat…
		craig.topperAuthorUnsubmitted Done Reply Inline Actions Good point. I added the scalar/splat case later after facing some regressions. craig.topper: Good point. I added the scalar/splat case later after facing some regressions.
		}
		}
		}

		// If we made any changes or the we have extra operands, we need to generate
		// new instructions.
		if (!RewriteGEP && Ops.size() == 2)
		return false;

		unsigned NumElts = Ptr->getType()->getVectorNumElements();

		IRBuilder<> Builder(MemoryInst);

		Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());

		Value *NewAddr;

		// If the final index isn't a vector, emit a scalar GEP containing all ops
		// and a vector GEP with all zeroes final index.
		if (!Ops[FinalIndex]->getType()->isVectorTy()) {
		NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front());
		Type *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
		NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy));
		} else {
		Value *Base = Ops[0];
		Value *Index = Ops[FinalIndex];

		// Create a scalar GEP if there are more than 2 operands.
		if (Ops.size() != 2) {
		// Replace the last index with 0.
		Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy);
		Base = Builder.CreateGEP(Base, makeArrayRef(Ops).drop_front());
		}

		// Now create the GEP with scalar pointer and vector index.
		NewAddr = Builder.CreateGEP(Base, Index);
		}

		MemoryInst->replaceUsesOfWith(Ptr, NewAddr);

		// If we have no uses, recursively delete the value and all dead instructions
		// using it.
		if (Ptr->use_empty())
		RecursivelyDeleteTriviallyDeadInstructions(Ptr, TLInfo);

		return true;
		}

/// If there are any memory operands, use OptimizeMemoryInst to sink their		/// If there are any memory operands, use OptimizeMemoryInst to sink their
		spatelUnsubmitted Not Done Reply Inline Actions Can this copied chunk be lifted to a helper function as a preliminary commit? spatel: Can this copied chunk be lifted to a helper function as a preliminary commit?
/// address computing into the block when possible / profitable.		/// address computing into the block when possible / profitable.
bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {		bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
bool MadeChange = false;		bool MadeChange = false;

const TargetRegisterInfo *TRI =		const TargetRegisterInfo *TRI =
TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();		TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
TargetLowering::AsmOperandInfoVector TargetConstraints =		TargetLowering::AsmOperandInfoVector TargetConstraints =
TLI->ParseConstraints(DL, TRI, CS);		TLI->ParseConstraints(DL, TRI, CS);
▲ Show 20 Lines • Show All 2,375 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h

Show First 20 Lines • Show All 512 Lines • ▼ Show 20 Lines	public:
bool handleDebugValue(const Value V, DILocalVariable Var,		bool handleDebugValue(const Value V, DILocalVariable Var,
DIExpression *Expr, DebugLoc CurDL,		DIExpression *Expr, DebugLoc CurDL,
DebugLoc InstDL, unsigned Order);		DebugLoc InstDL, unsigned Order);

/// Evict any dangling debug information, attempting to salvage it first.		/// Evict any dangling debug information, attempting to salvage it first.
void resolveOrClearDbgInfo();		void resolveOrClearDbgInfo();

SDValue getValue(const Value *V);		SDValue getValue(const Value *V);
bool findValue(const Value *V) const;

/// Return the SDNode for the specified IR value if it exists.		/// Return the SDNode for the specified IR value if it exists.
SDNode getNodeForIRValue(const Value V) {		SDNode getNodeForIRValue(const Value V) {
if (NodeMap.find(V) == NodeMap.end())		if (NodeMap.find(V) == NodeMap.end())
return nullptr;		return nullptr;
return NodeMap[V].getNode();		return NodeMap[V].getNode();
}		}

▲ Show 20 Lines • Show All 383 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,429 Lines • ▼ Show 20 Lines	SDValue SelectionDAGBuilder::getValue(const Value *V) {

// Otherwise create a new SDValue and remember it.		// Otherwise create a new SDValue and remember it.
SDValue Val = getValueImpl(V);		SDValue Val = getValueImpl(V);
NodeMap[V] = Val;		NodeMap[V] = Val;
resolveDanglingDebugInfo(V, Val);		resolveDanglingDebugInfo(V, Val);
return Val;		return Val;
}		}

// Return true if SDValue exists for the given Value
bool SelectionDAGBuilder::findValue(const Value *V) const {
return (NodeMap.find(V) != NodeMap.end()) \|\|
(FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
}

/// getNonRegisterValue - Return an SDValue for the given Value, but		/// getNonRegisterValue - Return an SDValue for the given Value, but
/// don't look in FuncInfo.ValueMap for a virtual register.		/// don't look in FuncInfo.ValueMap for a virtual register.
SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {		SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
// If we already have an SDValue for this value, use it.		// If we already have an SDValue for this value, use it.
SDValue &N = NodeMap[V];		SDValue &N = NodeMap[V];
if (N.getNode()) {		if (N.getNode()) {
if (isa<ConstantSDNode>(N) \|\| isa<ConstantFPSDNode>(N)) {		if (isa<ConstantSDNode>(N) \|\| isa<ConstantFPSDNode>(N)) {
// Remove the debug location from the node as the node is about to be used		// Remove the debug location from the node as the node is about to be used
▲ Show 20 Lines • Show All 2,797 Lines • ▼ Show 20 Lines
// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..		// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
//		//
// When the first GEP operand is a single pointer - it is the uniform base we		// When the first GEP operand is a single pointer - it is the uniform base we
// are looking for. If first operand of the GEP is a splat vector - we		// are looking for. If first operand of the GEP is a splat vector - we
// extract the splat value and use it as a uniform base.		// extract the splat value and use it as a uniform base.
// In all other cases the function returns 'false'.		// In all other cases the function returns 'false'.
static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,		static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
ISD::MemIndexType &IndexType, SDValue &Scale,		ISD::MemIndexType &IndexType, SDValue &Scale,
SelectionDAGBuilder *SDB) {		SelectionDAGBuilder SDB, const BasicBlock CurBB) {
SelectionDAG& DAG = SDB->DAG;		SelectionDAG& DAG = SDB->DAG;
LLVMContext &Context = *DAG.getContext();		const TargetLowering &TLI = DAG.getTargetLoweringInfo();
		const DataLayout &DL = DAG.getDataLayout();

assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");		assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
if (!GEP)
return false;

const Value *BasePtr = GEP->getPointerOperand();		// Handle splat constant pointer.
if (BasePtr->getType()->isVectorTy()) {		if (auto *C = dyn_cast<Constant>(Ptr)) {
BasePtr = getSplatValue(BasePtr);
if (!BasePtr)
return false;
}

unsigned FinalIndex = GEP->getNumOperands() - 1;
Value *IndexVal = GEP->getOperand(FinalIndex);
gep_type_iterator GTI = gep_type_begin(*GEP);

// Ensure all the other indices are 0.
for (unsigned i = 1; i < FinalIndex; ++i, ++GTI) {
auto *C = dyn_cast<Constant>(GEP->getOperand(i));
if (!C)
return false;
if (isa<VectorType>(C->getType()))
C = C->getSplatValue();		C = C->getSplatValue();
auto *CI = dyn_cast_or_null<ConstantInt>(C);		if (!C)
if (!CI \|\| !CI->isZero())
return false;		return false;

		Base = SDB->getValue(C);

		unsigned NumElts = Ptr->getType()->getVectorNumElements();
		EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts);
		Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT);
		IndexType = ISD::SIGNED_SCALED;
		Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
		return true;
}		}

// The operands of the GEP may be defined in another basic block.		const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
// In this case we'll not find nodes for the operands.		if (!GEP \|\| GEP->getParent() != CurBB)
if (!SDB->findValue(BasePtr))
return false;		return false;
Constant *C = dyn_cast<Constant>(IndexVal);
if (!C && !SDB->findValue(IndexVal))		if (GEP->getNumOperands() != 2)
return false;		return false;

const TargetLowering &TLI = DAG.getTargetLoweringInfo();		const Value *BasePtr = GEP->getPointerOperand();
const DataLayout &DL = DAG.getDataLayout();		const Value *IndexVal = GEP->getOperand(GEP->getNumOperands() - 1);
StructType *STy = GTI.getStructTypeOrNull();

if (STy) {		// Make sure the base is scalar and the index is a vector.
const StructLayout *SL = DL.getStructLayout(STy);		if (BasePtr->getType()->isVectorTy() \|\| !IndexVal->getType()->isVectorTy())
unsigned Field = cast<Constant>(IndexVal)->getUniqueInteger().getZExtValue();		return false;
Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
Index = DAG.getConstant(SL->getElementOffset(Field),		Base = SDB->getValue(BasePtr);
SDB->getCurSDLoc(), TLI.getPointerTy(DL));		Index = SDB->getValue(IndexVal);
} else {		IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(		Scale = DAG.getTargetConstant(
DL.getTypeAllocSize(GEP->getResultElementType()),		DL.getTypeAllocSize(GEP->getResultElementType()),
SDB->getCurSDLoc(), TLI.getPointerTy(DL));		SDB->getCurSDLoc(), TLI.getPointerTy(DL));
Index = SDB->getValue(IndexVal);
}
Base = SDB->getValue(BasePtr);
IndexType = ISD::SIGNED_SCALED;

if (STy \|\| !Index.getValueType().isVector()) {
unsigned GEPWidth = cast<VectorType>(GEP->getType())->getNumElements();
EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
}
return true;		return true;
}		}

void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {		void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDLoc sdl = getCurSDLoc();		SDLoc sdl = getCurSDLoc();

// llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask)		// llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask)
const Value *Ptr = I.getArgOperand(1);		const Value *Ptr = I.getArgOperand(1);
SDValue Src0 = getValue(I.getArgOperand(0));		SDValue Src0 = getValue(I.getArgOperand(0));
SDValue Mask = getValue(I.getArgOperand(3));		SDValue Mask = getValue(I.getArgOperand(3));
EVT VT = Src0.getValueType();		EVT VT = Src0.getValueType();
MaybeAlign Alignment(cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());		MaybeAlign Alignment(cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
if (!Alignment)		if (!Alignment)
Alignment = DAG.getEVTAlign(VT);		Alignment = DAG.getEVTAlign(VT);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();		const TargetLowering &TLI = DAG.getTargetLoweringInfo();

AAMDNodes AAInfo;		AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);		I.getAAMetadata(AAInfo);

SDValue Base;		SDValue Base;
SDValue Index;		SDValue Index;
ISD::MemIndexType IndexType;		ISD::MemIndexType IndexType;
SDValue Scale;		SDValue Scale;
bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this);		bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
		I.getParent());

unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();		unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(		MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(AS), MachineMemOperand::MOStore,		MachinePointerInfo(AS), MachineMemOperand::MOStore,
// TODO: Make MachineMemOperands aware of scalable		// TODO: Make MachineMemOperands aware of scalable
// vectors.		// vectors.
MemoryLocation::UnknownSize, *Alignment, AAInfo);		MemoryLocation::UnknownSize, *Alignment, AAInfo);
if (!UniformBase) {		if (!UniformBase) {
▲ Show 20 Lines • Show All 94 Lines • ▼ Show 20 Lines	void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
I.getAAMetadata(AAInfo);		I.getAAMetadata(AAInfo);
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);		const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);

SDValue Root = DAG.getRoot();		SDValue Root = DAG.getRoot();
SDValue Base;		SDValue Base;
SDValue Index;		SDValue Index;
ISD::MemIndexType IndexType;		ISD::MemIndexType IndexType;
SDValue Scale;		SDValue Scale;
bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this);		bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
		I.getParent());
unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();		unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(		MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(AS), MachineMemOperand::MOLoad,		MachinePointerInfo(AS), MachineMemOperand::MOLoad,
// TODO: Make MachineMemOperands aware of scalable		// TODO: Make MachineMemOperands aware of scalable
// vectors.		// vectors.
MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);		MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);

if (!UniformBase) {		if (!UniformBase) {
▲ Show 20 Lines • Show All 6,122 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/masked_gather.ll

	Show First 20 Lines • Show All 1,715 Lines • ▼ Show 20 Lines
	; AVX2-NEXT: jmp .LBB4_46			; AVX2-NEXT: jmp .LBB4_46
	;			;
	; AVX512-LABEL: gather_v8i32_v8i32:			; AVX512-LABEL: gather_v8i32_v8i32:
	; AVX512: # %bb.0:			; AVX512: # %bb.0:
	; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0			; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
	; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0			; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
	; AVX512-NEXT: kshiftlw $8, %k0, %k0			; AVX512-NEXT: kshiftlw $8, %k0, %k0
	; AVX512-NEXT: kshiftrw $8, %k0, %k1			; AVX512-NEXT: kshiftrw $8, %k0, %k1
	; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]			; AVX512-NEXT: vpxor %xmm0, %xmm0, %xmm0
	; AVX512-NEXT: kmovw %k1, %k2			; AVX512-NEXT: kmovw %k1, %k2
	; AVX512-NEXT: vpgatherdd c(,%zmm0,4), %zmm1 {%k2}			; AVX512-NEXT: vpgatherdd c+12(,%zmm0), %zmm1 {%k2}
	; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm0 = [28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28]			; AVX512-NEXT: vpgatherdd c+28(,%zmm0), %zmm2 {%k1}
	; AVX512-NEXT: vpgatherdd c(,%zmm0), %zmm2 {%k1}
	; AVX512-NEXT: vpaddd %ymm2, %ymm2, %ymm0			; AVX512-NEXT: vpaddd %ymm2, %ymm2, %ymm0
	; AVX512-NEXT: vpaddd %ymm0, %ymm1, %ymm0			; AVX512-NEXT: vpaddd %ymm0, %ymm1, %ymm0
	; AVX512-NEXT: retq			; AVX512-NEXT: retq
	%1 = insertelement <8 x %struct.a> undef, %struct.a @c, i32 0			%1 = insertelement <8 x %struct.a> undef, %struct.a @c, i32 0
	%2 = shufflevector <8 x %struct.a> %1, <8 x %struct.a> undef, <8 x i32> zeroinitializer			%2 = shufflevector <8 x %struct.a> %1, <8 x %struct.a> undef, <8 x i32> zeroinitializer
	%3 = getelementptr %struct.a, <8 x %struct.a*> %2, <8 x i32> zeroinitializer, i32 0, i32 3			%3 = getelementptr %struct.a, <8 x %struct.a*> %2, <8 x i32> zeroinitializer, i32 0, i32 3
	%4 = icmp eq <8 x i32> %trigger, zeroinitializer			%4 = icmp eq <8 x i32> %trigger, zeroinitializer
	%5 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %3, i32 4, <8 x i1> %4, <8 x i32> undef)			%5 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %3, i32 4, <8 x i1> %4, <8 x i32> undef)
	Show All 18 Lines

llvm/test/CodeGen/X86/masked_gather_scatter.ll

Show First 20 Lines • Show All 632 Lines • ▼ Show 20 Lines	entry:
%res = call <8 x i32 > @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)		%res = call <8 x i32 > @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
ret <8 x i32> %res		ret <8 x i32> %res
}		}

; Splat index in GEP, requires broadcast		; Splat index in GEP, requires broadcast
define <16 x float> @test11(float* %base, i32 %ind) {		define <16 x float> @test11(float* %base, i32 %ind) {
; KNL_64-LABEL: test11:		; KNL_64-LABEL: test11:
; KNL_64: # %bb.0:		; KNL_64: # %bb.0:
; KNL_64-NEXT: vpbroadcastd %esi, %zmm1		; KNL_64-NEXT: movslq %esi, %rax
		; KNL_64-NEXT: leaq (%rdi,%rax,4), %rax
		; KNL_64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; KNL_64-NEXT: kxnorw %k0, %k0, %k1		; KNL_64-NEXT: kxnorw %k0, %k0, %k1
; KNL_64-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}		; KNL_64-NEXT: vgatherdps (%rax,%zmm1,4), %zmm0 {%k1}
; KNL_64-NEXT: retq		; KNL_64-NEXT: retq
;		;
; KNL_32-LABEL: test11:		; KNL_32-LABEL: test11:
; KNL_32: # %bb.0:		; KNL_32: # %bb.0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax		; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %zmm1		; KNL_32-NEXT: shll $2, %eax
		; KNL_32-NEXT: addl {{[0-9]+}}(%esp), %eax
		; KNL_32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; KNL_32-NEXT: kxnorw %k0, %k0, %k1		; KNL_32-NEXT: kxnorw %k0, %k0, %k1
; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}		; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
; KNL_32-NEXT: retl		; KNL_32-NEXT: retl
;		;
; SKX-LABEL: test11:		; SKX-LABEL: test11:
; SKX: # %bb.0:		; SKX: # %bb.0:
; SKX-NEXT: vpbroadcastd %esi, %zmm1		; SKX-NEXT: movslq %esi, %rax
		; SKX-NEXT: leaq (%rdi,%rax,4), %rax
		; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; SKX-NEXT: kxnorw %k0, %k0, %k1		; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}		; SKX-NEXT: vgatherdps (%rax,%zmm1,4), %zmm0 {%k1}
		efriedmaUnsubmitted Not Done Reply Inline Actions Orthogonal, but probably you could add a special-case here: vgather with an all-zero vector is equivalent to vbroadcast, I think? efriedma: Orthogonal, but probably you could add a special-case here: vgather with an all-zero vector is…
		craig.topperAuthorUnsubmitted Done Reply Inline Actions Yep it is. Need to look at whether we need to do that in DAG combine with a new X86ISD opcode to carry the mask or if we can just pattern match it in isel. craig.topper: Yep it is. Need to look at whether we need to do that in DAG combine with a new X86ISD opcode…
; SKX-NEXT: retq		; SKX-NEXT: retq
;		;
; SKX_32-LABEL: test11:		; SKX_32-LABEL: test11:
; SKX_32: # %bb.0:		; SKX_32: # %bb.0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax		; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %zmm1		; SKX_32-NEXT: shll $2, %eax
		; SKX_32-NEXT: addl {{[0-9]+}}(%esp), %eax
		; SKX_32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; SKX_32-NEXT: kxnorw %k0, %k0, %k1		; SKX_32-NEXT: kxnorw %k0, %k0, %k1
; SKX_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}		; SKX_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
; SKX_32-NEXT: retl		; SKX_32-NEXT: retl

%broadcast.splatinsert = insertelement <16 x float> undef, float %base, i32 0		%broadcast.splatinsert = insertelement <16 x float> undef, float %base, i32 0
%broadcast.splat = shufflevector <16 x float> %broadcast.splatinsert, <16 x float> undef, <16 x i32> zeroinitializer		%broadcast.splat = shufflevector <16 x float> %broadcast.splatinsert, <16 x float> undef, <16 x i32> zeroinitializer

%gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind		%gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind
▲ Show 20 Lines • Show All 2,641 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/pr45067.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skylake \| FileCheck %s			; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skylake \| FileCheck %s

	@global = external global i32, align 4			@global = external global i32, align 4

	define void @foo(<8 x i32>* %x, <8 x i1> %y) {			define void @foo(<8 x i32>* %x, <8 x i1> %y) {
	; CHECK-LABEL: foo:			; CHECK-LABEL: foo:
	; CHECK: ## %bb.0:			; CHECK: ## %bb.0:
	; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1			; CHECK-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
	; CHECK-NEXT: vpbroadcastq _global@{{.*}}(%rip), %ymm2			; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
	; CHECK-NEXT: vpgatherqd %xmm1, (,%ymm2), %xmm3			; CHECK-NEXT: movq _global@{{.*}}(%rip), %rax
				; CHECK-NEXT: vpgatherdd %ymm1, (%rax,%ymm2), %ymm3
	; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero			; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
	; CHECK-NEXT: vpslld $31, %ymm0, %ymm0			; CHECK-NEXT: vpslld $31, %ymm0, %ymm0
	; CHECK-NEXT: vinserti128 $1, %xmm3, %ymm3, %ymm1			; CHECK-NEXT: vpmaskmovd %ymm3, %ymm0, (%rdi)
	; CHECK-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi)
	; CHECK-NEXT: ud2			; CHECK-NEXT: ud2
	%tmp = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32> <i32 @global, i32* @global, i32* @global, i32* @global, i32* @global, i32* @global, i32* @global, i32* @global>, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)			%tmp = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32> <i32 @global, i32* @global, i32* @global, i32* @global, i32* @global, i32* @global, i32* @global, i32* @global>, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
	call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %tmp, <8 x i32>* %x, i32 4, <8 x i1> %y)			call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %tmp, <8 x i32>* %x, i32 4, <8 x i1> %y)
	unreachable			unreachable
	}			}

	declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>, i32, <8 x i1>, <8 x i32>)			declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>, i32, <8 x i1>, <8 x i32>)
	declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)			declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)

llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
				; RUN: opt -S -codegenprepare < %s \| FileCheck %s

				target datalayout =
				"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
				target triple = "x86_64-unknown-linux-gnu"

				%struct.a = type { i32, i32 }
				@c = external dso_local global %struct.a, align 4
				@glob_array = internal unnamed_addr constant [16 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 21, i32 34, i32 55, i32 89, i32 144, i32 233, i32 377, i32 610, i32 987], align 16

				define <4 x i32> @splat_base(i32* %base, <4 x i64> %index) {
				; CHECK-LABEL: @splat_base(
				; CHECK-NEXT: [[TMP1:%.]] = getelementptr i32, i32 [[BASE:%.]], <4 x i64> [[INDEX:%.]]
				; CHECK-NEXT: [[RES:%.]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
				; CHECK-NEXT: ret <4 x i32> [[RES]]
				;
				%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %base, i32 0
				%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
				%gep = getelementptr i32, <4 x i32*> %broadcast.splat, <4 x i64> %index
				%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
				ret <4 x i32> %res
				}

				define <4 x i32> @splat_struct(%struct.a* %base) {
				; CHECK-LABEL: @splat_struct(
				; CHECK-NEXT: [[TMP1:%.]] = getelementptr [[STRUCT_A:%.]], %struct.a* [[BASE:%.*]], i64 0, i32 1
				; CHECK-NEXT: [[TMP2:%.]] = getelementptr i32, i32 [[TMP1]], <4 x i64> zeroinitializer
				; CHECK-NEXT: [[RES:%.]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
				; CHECK-NEXT: ret <4 x i32> [[RES]]
				;
				%gep = getelementptr %struct.a, %struct.a* %base, <4 x i64> zeroinitializer, i32 1
				%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
				ret <4 x i32> %res
				}

				define <4 x i32> @scalar_index(i32* %base, i64 %index) {
				; CHECK-LABEL: @scalar_index(
				; CHECK-NEXT: [[TMP1:%.]] = getelementptr i32, i32 [[BASE:%.]], i64 [[INDEX:%.]]
				; CHECK-NEXT: [[TMP2:%.]] = getelementptr i32, i32 [[TMP1]], <4 x i64> zeroinitializer
				; CHECK-NEXT: [[RES:%.]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
				; CHECK-NEXT: ret <4 x i32> [[RES]]
				;
				%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %base, i32 0
				%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
				%gep = getelementptr i32, <4 x i32*> %broadcast.splat, i64 %index
				%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
				ret <4 x i32> %res
				}

				define <4 x i32> @splat_index(i32* %base, i64 %index) {
				; CHECK-LABEL: @splat_index(
				; CHECK-NEXT: [[TMP1:%.]] = getelementptr i32, i32 [[BASE:%.]], i64 [[INDEX:%.]]
				; CHECK-NEXT: [[TMP2:%.]] = getelementptr i32, i32 [[TMP1]], <4 x i64> zeroinitializer
				; CHECK-NEXT: [[RES:%.]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
				; CHECK-NEXT: ret <4 x i32> [[RES]]
				;
				%broadcast.splatinsert = insertelement <4 x i64> undef, i64 %index, i32 0
				%broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer
				%gep = getelementptr i32, i32* %base, <4 x i64> %broadcast.splat
				%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
				ret <4 x i32> %res
				}

				define <4 x i32> @test_global_array(<4 x i64> %indxs) {
				; CHECK-LABEL: @test_global_array(
				; CHECK-NEXT: [[TMP1:%.]] = getelementptr i32, i32 getelementptr inbounds ([16 x i32], [16 x i32]* @glob_array, i64 0, i64 0), <4 x i64> [[INDXS:%.*]]
				; CHECK-NEXT: [[G:%.]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
				; CHECK-NEXT: ret <4 x i32> [[G]]
				;
				%p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <4 x i64> %indxs
				%g = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
				ret <4 x i32> %g
				}

				define <4 x i32> @global_struct_splat() {
				; CHECK-LABEL: @global_struct_splat(
				; CHECK-NEXT: [[TMP1:%.]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32> <i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1)>, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
				; CHECK-NEXT: ret <4 x i32> [[TMP1]]
				;
				%1 = insertelement <4 x %struct.a> undef, %struct.a @c, i32 0
				%2 = shufflevector <4 x %struct.a> %1, <4 x %struct.a> undef, <4 x i32> zeroinitializer
				%3 = getelementptr %struct.a, <4 x %struct.a*> %2, <4 x i64> zeroinitializer, i32 1
				%4 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %3, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
				ret <4 x i32> %4
				}

				declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)

This is an archive of the discontinued LLVM Phabricator instance.

[SelectionDAGBuilder][CGP][X86] Move some of SDB's gather/scatter uniform base handling to CGP.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 258215

llvm/lib/CodeGen/CodeGenPrepare.cpp

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

llvm/test/CodeGen/X86/masked_gather.ll

llvm/test/CodeGen/X86/masked_gather_scatter.ll

llvm/test/CodeGen/X86/pr45067.ll

llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SelectionDAGBuilder][CGP][X86] Move some of SDB's gather/scatter uniform base handling to CGP.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 258215

llvm/lib/CodeGen/CodeGenPrepare.cpp

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

llvm/test/CodeGen/X86/masked_gather.ll

llvm/test/CodeGen/X86/masked_gather_scatter.ll

llvm/test/CodeGen/X86/pr45067.ll

llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll

[SelectionDAGBuilder][CGP][X86] Move some of SDB's gather/scatter uniform base handling to CGP.
ClosedPublic