Diff 89096

lib/Target/ARM/ARMISelDAGToDAG.cpp

	//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//			//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
	//			//
	// The LLVM Compiler Infrastructure			// The LLVM Compiler Infrastructure
	//			//
	// This file is distributed under the University of Illinois Open Source			// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.			// License. See LICENSE.TXT for details.
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	//			//
	// This file defines an instruction selector for the ARM target.			// This file defines an instruction selector for the ARM target.
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "ARM.h"			#include "ARM.h"
	#include "ARMBaseInstrInfo.h"			#include "ARMBaseInstrInfo.h"
	#include "ARMTargetMachine.h"			#include "ARMTargetMachine.h"
				#include "ARMPatternHelpers.h"
	#include "MCTargetDesc/ARMAddressingModes.h"			#include "MCTargetDesc/ARMAddressingModes.h"
	#include "llvm/ADT/StringSwitch.h"			#include "llvm/ADT/StringSwitch.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"			#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"			#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"			#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"			#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/SelectionDAG.h"			#include "llvm/CodeGen/SelectionDAG.h"
	#include "llvm/CodeGen/SelectionDAGISel.h"			#include "llvm/CodeGen/SelectionDAGISel.h"
	▲ Show 20 Lines • Show All 184 Lines • ▼ Show 20 Lines
	bool tryReadRegister(SDNode *N);			bool tryReadRegister(SDNode *N);
	bool tryWriteRegister(SDNode *N);			bool tryWriteRegister(SDNode *N);

	bool tryInlineAsm(SDNode *N);			bool tryInlineAsm(SDNode *N);

	void SelectConcatVector(SDNode *N);			void SelectConcatVector(SDNode *N);
	void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);			void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);

				bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1,
				bool Accumulate);
				bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
				SDValue &Src1, bool Accumulate);
	bool trySMLAWSMULW(SDNode *N);			bool trySMLAWSMULW(SDNode *N);

	void SelectCMP_SWAP(SDNode *N);			void SelectCMP_SWAP(SDNode *N);

	/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for			/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
	/// inline asm expressions.			/// inline asm expressions.
	bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,			bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
	std::vector<SDValue> &OutOps) override;			std::vector<SDValue> &OutOps) override;
	▲ Show 20 Lines • Show All 184 Lines • ▼ Show 20 Lines
	unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;			unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
	CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);			CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
	return true;			return true;
	}			}

	return false;			return false;
	}			}

	static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1,			bool ARMDAGToDAGISel::SearchSignedMulShort(SDValue SignExt, unsigned *Opc,
	bool Accumulate) {			SDValue &Src1, bool Accumulate) {
	// For SM*WB, we need to some form of sext.			// For SM*WB, we need to some form of sext.
	// For SM*WT, we need to search for (sra X, 16)			// For SM*WT, we need to search for (sra X, 16)
	// Src1 then gets set to X.			// Src1 then gets set to X.
	if ((SignExt.getOpcode() == ISD::SIGN_EXTEND \|\|			if (isBottomS16(SignExt, CurDAG)) {
	SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG \|\|
	SignExt.getOpcode() == ISD::AssertSext) &&
	SignExt.getValueType() == MVT::i32) {

	*Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;			*Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
	Src1 = SignExt.getOperand(0);			Src1 = get16BitVal(SignExt);
	return true;			return true;
	}			}

	if (SignExt.getOpcode() != ISD::SRA)			if (!isSRA16(SignExt))
	return false;

	ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1));
	if (!SRASrc1 \|\| SRASrc1->getZExtValue() != 16)
	return false;			return false;

	SDValue Op0 = SignExt.getOperand(0);			SDValue Op0 = SignExt.getOperand(0);

	// The sign extend operand for SM*WB could be generated by a shl and ashr.			// The sign extend operand for SM*WB could be generated by a shl and ashr.
	if (Op0.getOpcode() == ISD::SHL) {			if (isSHL16(Op0)) {
	SDValue SHL = Op0;
	ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
	if (!SHLSrc1 \|\| SHLSrc1->getZExtValue() != 16)
	return false;

	*Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;			*Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
	Src1 = Op0.getOperand(0);			Src1 = Op0.getOperand(0);
	return true;			return true;
	}			}
	*Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT;			*Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT;
	Src1 = SignExt.getOperand(0);			Src1 = SignExt.getOperand(0);
	return true;			return true;
	}			}

	static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,			bool ARMDAGToDAGISel::SearchSignedMulLong(SDValue OR, unsigned *Opc,
	SDValue &Src1, bool Accumulate) {			SDValue &Src0, SDValue &Src1,
				bool Accumulate) {
	// First we look for:			// First we look for:
	// (add (or (srl ?, 16), (shl ?, 16)))			// (add (or (srl ?, 16), (shl ?, 16)))
	if (OR.getOpcode() != ISD::OR)			if (OR.getOpcode() != ISD::OR)
	return false;			return false;

	SDValue SRL = OR.getOperand(0);			SDValue SRL = OR.getOperand(0);
	SDValue SHL = OR.getOperand(1);			SDValue SHL = OR.getOperand(1);

	if (SRL.getOpcode() != ISD::SRL \|\| SHL.getOpcode() != ISD::SHL) {			if (SRL.getOpcode() != ISD::SRL \|\| SHL.getOpcode() != ISD::SHL) {
	SRL = OR.getOperand(1);			SRL = OR.getOperand(1);
	SHL = OR.getOperand(0);			SHL = OR.getOperand(0);
	if (SRL.getOpcode() != ISD::SRL \|\| SHL.getOpcode() != ISD::SHL)
	return false;
	}			}
				if (!isSRL16(SRL) \|\| !isSHL16(SHL))
	ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
	ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
	if (!SRLSrc1 \|\| !SHLSrc1 \|\| SRLSrc1->getZExtValue() != 16 \|\|
	SHLSrc1->getZExtValue() != 16)
	return false;			return false;

	// The first operands to the shifts need to be the two results from the			// The first operands to the shifts need to be the two results from the
	// same smul_lohi node.			// same smul_lohi node.
	if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) \|\|			if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) \|\|
	SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)			SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
	return false;			return false;

	▲ Show 20 Lines • Show All 179 Lines • ▼ Show 20 Lines
	}else{			}else{
	SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),			SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
	N->getOperand(3), getAL(CurDAG, dl),			N->getOperand(3), getAL(CurDAG, dl),
	CurDAG->getRegister(0, MVT::i32),			CurDAG->getRegister(0, MVT::i32),
	CurDAG->getRegister(0, MVT::i32) };			CurDAG->getRegister(0, MVT::i32) };
	ReplaceNode(N, CurDAG->getMachineNode(			ReplaceNode(N, CurDAG->getMachineNode(
	Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,			Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
	MVT::i32, MVT::i32, Ops));			MVT::i32, MVT::i32, Ops));
	return;			return;
	}			}
	}			}
	case ARMISD::SMLAL:{			case ARMISD::SMLAL:{
	if (Subtarget->isThumb()) {			if (Subtarget->isThumb()) {
	SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),			SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
	N->getOperand(3), getAL(CurDAG, dl),			N->getOperand(2), N->getOperand(3), getAL(CurDAG, dl),
	CurDAG->getRegister(0, MVT::i32)};			CurDAG->getRegister(0, MVT::i32) };
	ReplaceNode(			ReplaceNode(N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32,
	N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));			MVT::i32, Ops));
	return;			return;
	}else{			} else {
				unsigned Opcode = Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5;
	SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),			SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
	N->getOperand(3), getAL(CurDAG, dl),			N->getOperand(3), getAL(CurDAG, dl),
				t.p.northoverUnsubmitted Not Done Reply Inline Actions The only functional change here appears to be limiting the first block to Thumb2, but the second block is no more correct on Thumb1 machines so I don't think it actually improves anything. t.p.northover: The only functional change here appears to be limiting the first block to Thumb2, but the…
	CurDAG->getRegister(0, MVT::i32),			CurDAG->getRegister(0, MVT::i32),
	CurDAG->getRegister(0, MVT::i32) };			CurDAG->getRegister(0, MVT::i32) };
	ReplaceNode(N, CurDAG->getMachineNode(			ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
	Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,			Ops));
				t.p.northoverUnsubmitted Not Done Reply Inline Actions You shouldn't need C++ for this. Patterns embedded in Instructions can't produce more than one value, but by a quirk of notation instantiations of Pat can. So you should be able to write something like def : Pat<(smlalbb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), (SMLALBB $Rn, $Rm, $RLo, $RHi)>; (after mapping ARMISD::SMLALBB to smlalbb of course). t.p.northover: You shouldn't need C++ for this. Patterns embedded in Instructions can't produce more than one…
				samparkerAuthorUnsubmitted Not Done Reply Inline Actions Ah excellent, I will clean all this up. samparker: Ah excellent, I will clean all this up.
	MVT::i32, MVT::i32, Ops));
	return;			return;
	}			}
	}			}
	case ARMISD::SUBE: {			case ARMISD::SUBE: {
	if (!Subtarget->hasV6Ops())			if (!Subtarget->hasV6Ops())
	break;			break;
	// Look for a pattern to match SMMLS			// Look for a pattern to match SMMLS
	// (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))			// (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
	▲ Show 20 Lines • Show All 92 Lines • Show Last 20 Lines

lib/Target/ARM/ARMISelLowering.h

	Show First 20 Lines • Show All 92 Lines • ▼ Show 20 Lines

	// Vector multiply long:			// Vector multiply long:
	VMULLs, // ...signed			VMULLs, // ...signed
	VMULLu, // ...unsigned			VMULLu, // ...unsigned

	UMLAL, // 64bit Unsigned Accumulate Multiply			UMLAL, // 64bit Unsigned Accumulate Multiply
	SMLAL, // 64bit Signed Accumulate Multiply			SMLAL, // 64bit Signed Accumulate Multiply
	UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply			UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply
				SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16
				SMLALBT, // 64-bit signed accumulate multiply bottom, top 16
				SMLALTB, // 64-bit signed accumulate multiply top, bottom 16
				SMLALTT, // 64-bit signed accumulate multiply top, top 16

	// Operands of the standard BUILD_VECTOR node are not legalized, which			// Operands of the standard BUILD_VECTOR node are not legalized, which
	// is fine if BUILD_VECTORs are always lowered to shuffles or other			// is fine if BUILD_VECTORs are always lowered to shuffles or other
	// operations, but for ARM some BUILD_VECTORs are legal as-is and their			// operations, but for ARM some BUILD_VECTORs are legal as-is and their
	// operands need to be legalized. Define an ARM-specific version of			// operands need to be legalized. Define an ARM-specific version of
	// BUILD_VECTOR for this purpose.			// BUILD_VECTOR for this purpose.
	BUILD_VECTOR,			BUILD_VECTOR,

	▲ Show 20 Lines • Show All 92 Lines • Show Last 20 Lines

lib/Target/ARM/ARMISelLowering.cpp

	Show All 12 Lines
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "ARMBaseInstrInfo.h"			#include "ARMBaseInstrInfo.h"
	#include "ARMBaseRegisterInfo.h"			#include "ARMBaseRegisterInfo.h"
	#include "ARMCallingConv.h"			#include "ARMCallingConv.h"
	#include "ARMConstantPoolValue.h"			#include "ARMConstantPoolValue.h"
	#include "ARMISelLowering.h"			#include "ARMISelLowering.h"
	#include "ARMMachineFunctionInfo.h"			#include "ARMMachineFunctionInfo.h"
				#include "ARMPatternHelpers.h"
	#include "ARMPerfectShuffle.h"			#include "ARMPerfectShuffle.h"
	#include "ARMRegisterInfo.h"			#include "ARMRegisterInfo.h"
	#include "ARMSelectionDAGInfo.h"			#include "ARMSelectionDAGInfo.h"
	#include "ARMSubtarget.h"			#include "ARMSubtarget.h"
	#include "MCTargetDesc/ARMAddressingModes.h"			#include "MCTargetDesc/ARMAddressingModes.h"
	#include "MCTargetDesc/ARMBaseInfo.h"			#include "MCTargetDesc/ARMBaseInfo.h"
	#include "llvm/ADT/APFloat.h"			#include "llvm/ADT/APFloat.h"
	#include "llvm/ADT/APInt.h"			#include "llvm/ADT/APInt.h"
	▲ Show 20 Lines • Show All 184 Lines • ▼ Show 20 Lines
	case ARMISD::VTRN: return "ARMISD::VTRN";			case ARMISD::VTRN: return "ARMISD::VTRN";
	case ARMISD::VTBL1: return "ARMISD::VTBL1";			case ARMISD::VTBL1: return "ARMISD::VTBL1";
	case ARMISD::VTBL2: return "ARMISD::VTBL2";			case ARMISD::VTBL2: return "ARMISD::VTBL2";
	case ARMISD::VMULLs: return "ARMISD::VMULLs";			case ARMISD::VMULLs: return "ARMISD::VMULLs";
	case ARMISD::VMULLu: return "ARMISD::VMULLu";			case ARMISD::VMULLu: return "ARMISD::VMULLu";
	case ARMISD::UMAAL: return "ARMISD::UMAAL";			case ARMISD::UMAAL: return "ARMISD::UMAAL";
	case ARMISD::UMLAL: return "ARMISD::UMLAL";			case ARMISD::UMLAL: return "ARMISD::UMLAL";
	case ARMISD::SMLAL: return "ARMISD::SMLAL";			case ARMISD::SMLAL: return "ARMISD::SMLAL";
				case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
				case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
				case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
				case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
	case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";			case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
	case ARMISD::BFI: return "ARMISD::BFI";			case ARMISD::BFI: return "ARMISD::BFI";
	case ARMISD::VORRIMM: return "ARMISD::VORRIMM";			case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
	case ARMISD::VBICIMM: return "ARMISD::VBICIMM";			case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
	case ARMISD::VBSL: return "ARMISD::VBSL";			case ARMISD::VBSL: return "ARMISD::VBSL";
	case ARMISD::MEMCPY: return "ARMISD::MEMCPY";			case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
	case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";			case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
	case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";			case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
	▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines
	// Load are scheduled for latency even if there instruction itinerary			// Load are scheduled for latency even if there instruction itinerary
	// is not available.			// is not available.
	const TargetInstrInfo *TII = Subtarget->getInstrInfo();			const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());			const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());

	if (MCID.getNumDefs() == 0)			if (MCID.getNumDefs() == 0)
	return Sched::RegPressure;			return Sched::RegPressure;
	if (!Itins->isEmpty() &&			if (!Itins->isEmpty() &&
	Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)			Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
				efriedmaUnsubmitted Not Done Reply Inline Actions This is a weird way to use SimplifyDemandedBits; normally you would first generate the ARMISD::SMLALBB node, then separately run SimplifyDemandedBits on all ARMISD::SMLALBB nodes. It composes better with other optimizations (which might expose additional simplifications), and it's more obviously correct (it isn't clear whether N has other users). efriedma: This is a weird way to use SimplifyDemandedBits; normally you would first generate the ARMISD…
	▲ Show 20 Lines • Show All 92 Lines • ▼ Show 20 Lines

	static SDValue findMUL_LOHI(SDValue V) {			static SDValue findMUL_LOHI(SDValue V) {
	if (V->getOpcode() == ISD::UMUL_LOHI \|\|			if (V->getOpcode() == ISD::UMUL_LOHI \|\|
	V->getOpcode() == ISD::SMUL_LOHI)			V->getOpcode() == ISD::SMUL_LOHI)
	return V;			return V;
	return SDValue();			return SDValue();
	}			}

				static SDValue AddCombineTo64BitSMLAL16(SDNode AddcNode, SDNode AddeNode,
				TargetLowering::DAGCombinerInfo &DCI,
				const ARMSubtarget *Subtarget) {

				if (Subtarget->isThumb()) {
				efriedmaUnsubmitted Not Done Reply Inline Actions The .td file says v5TE is required; this says DSP is required. Which is correct? efriedma: The .td file says v5TE is required; this says DSP is required. Which is correct?
				samparkerAuthorUnsubmitted Not Done Reply Inline Actions Yes good point, hasDSP is only the Thumb check. samparker: Yes good point, hasDSP is only the Thumb check.
				if (!Subtarget->hasDSP())
				return SDValue();
				} else if (!Subtarget->hasV5TEOps())
				return SDValue();

				// SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
				// accumulates the product into a 64-bit value. The 16-bit values will
				// be sign extended somehow or SRA'd into 32-bit values
				// (addc (adde (mul 16bit, 16bit), lo), hi)
				SDValue Mul = AddcNode->getOperand(0);
				SDValue Hi = AddcNode->getOperand(1);
				if (Mul.getOpcode() != ISD::MUL) {
				Hi = AddcNode->getOperand(0);
				Mul = AddcNode->getOperand(1);
				if (Mul.getOpcode() != ISD::MUL)
				return SDValue();
				}

				SDValue SRA = AddeNode->getOperand(0);
				SDValue Lo = AddeNode->getOperand(1);
				if (SRA.getOpcode() != ISD::SRA) {
				SRA = AddeNode->getOperand(1);
				Lo = AddeNode->getOperand(0);
				if (SRA.getOpcode() != ISD::SRA)
				t.p.northoverUnsubmitted Not Done Reply Inline Actions What if it's not a constant? t.p.northover: What if it's not a constant?
				return SDValue();
				}
				if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
				if (Const->getZExtValue() != 31)
				t.p.northoverUnsubmitted Not Done Reply Inline Actions What if it's not the same mul as before? t.p.northover: What if it's not the same mul as before?
				samparkerAuthorUnsubmitted Not Done Reply Inline Actions Sorry I don't get your point, what I am missing? samparker: Sorry I don't get your point, what I am missing?
				t.p.northoverUnsubmitted Not Done Reply Inline Actions Just knowing that the input to the SRA is some multiply operation isn't sufficient. You need to make sure it's the same one that produced the low bits. t.p.northover: Just knowing that the input to the SRA is some multiply operation isn't sufficient. You…
				samparkerAuthorUnsubmitted Not Done Reply Inline Actions Is this not what I have done? I'm checking that the MUL operand of the ADDC is the same operand to the SRA. samparker: Is this not what I have done? I'm checking that the MUL operand of the ADDC is the same operand…
				t.p.northoverUnsubmitted Not Done Reply Inline Actions Bother, sorry I completely misread it. t.p.northover: Bother, sorry I completely misread it.
				return SDValue();
				} else
				return SDValue();

				if (SRA.getOperand(0) != Mul)
				t.p.northoverUnsubmitted Not Done Reply Inline Actions Don't you also need to know that the base type was i16? I don't see any checks here. t.p.northover: Don't you also need to know that the base type was i16? I don't see any checks here.
				return SDValue();

				unsigned Opcode = 0;

				SelectionDAG &DAG = DCI.DAG;
				if (isBottomS16(Mul.getOperand(0), &DAG) && isBottomS16(Mul.getOperand(1), &DAG))
				Opcode = ARMISD::SMLALBB;
				else if (isBottomS16(Mul.getOperand(0), &DAG) & isSRA16(Mul.getOperand(1)))
				Opcode = ARMISD::SMLALBT;
				else if (isSRA16(Mul.getOperand(0)) && isBottomS16(Mul.getOperand(1), &DAG))
				Opcode = ARMISD::SMLALTB;
				else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1)))
				Opcode = ARMISD::SMLALTT;
				else {
				return SDValue();
				}

				SDLoc dl(AddcNode);

				SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
				get16BitVal(Mul.getOperand(0)),
				get16BitVal(Mul.getOperand(1)), Lo, Hi);
				// Replace the ADDs' nodes uses by the MLA node's values.
				SDValue HiMLALResult(SMLAL.getNode(), 1);
				SDValue LoMLALResult(SMLAL.getNode(), 0);

				DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
				DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);

				// Return original node to notify the driver to stop replacing.
				SDValue resNode(AddcNode, 0);
				return resNode;
				}

	static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,			static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
	TargetLowering::DAGCombinerInfo &DCI,			TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {			const ARMSubtarget *Subtarget) {
	// Look for multiply add opportunities.			// Look for multiply add opportunities.
	// The pattern is a ISD::UMUL_LOHI followed by two add nodes, where			// The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
	// each add nodes consumes a value from ISD::UMUL_LOHI and there is			// each add nodes consumes a value from ISD::UMUL_LOHI and there is
	// a glue link from the first add to the second add.			// a glue link from the first add to the second add.
	// If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by			// If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
	Show All 17 Lines
	assert(AddcNode->getNumValues() == 2 &&			assert(AddcNode->getNumValues() == 2 &&
	AddcNode->getValueType(0) == MVT::i32 &&			AddcNode->getValueType(0) == MVT::i32 &&
	"Expect ADDC with two result values. First: i32");			"Expect ADDC with two result values. First: i32");

	// Check that we have a glued ADDC node.			// Check that we have a glued ADDC node.
	if (AddcNode->getValueType(1) != MVT::Glue)			if (AddcNode->getValueType(1) != MVT::Glue)
	return SDValue();			return SDValue();

	// Check that the ADDC adds the low result of the S/UMUL_LOHI.
	if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
	AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
	AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
	AddcOp1->getOpcode() != ISD::SMUL_LOHI)
	return SDValue();

	// Look for the glued ADDE.			// Look for the glued ADDE.
	SDNode* AddeNode = AddcNode->getGluedUser();			SDNode* AddeNode = AddcNode->getGluedUser();
	if (!AddeNode)			if (!AddeNode)
	return SDValue();			return SDValue();

	// Make sure it is really an ADDE.			// Make sure it is really an ADDE.
	if (AddeNode->getOpcode() != ISD::ADDE)			if (AddeNode->getOpcode() != ISD::ADDE)
	return SDValue();			return SDValue();

	assert(AddeNode->getNumOperands() == 3 &&			assert(AddeNode->getNumOperands() == 3 &&
	AddeNode->getOperand(2).getValueType() == MVT::Glue &&			AddeNode->getOperand(2).getValueType() == MVT::Glue &&
	"ADDE node has the wrong inputs");			"ADDE node has the wrong inputs");

				// Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
				// maybe a SMLAL which multiplies two 16-bit values.
				if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
				AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
				AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
				AddcOp1->getOpcode() != ISD::SMUL_LOHI)
				return AddCombineTo64BitSMLAL16(AddcNode, AddeNode, DCI, Subtarget);

	// Check for the triangle shape.			// Check for the triangle shape.
	SDValue AddeOp0 = AddeNode->getOperand(0);			SDValue AddeOp0 = AddeNode->getOperand(0);
	SDValue AddeOp1 = AddeNode->getOperand(1);			SDValue AddeOp1 = AddeNode->getOperand(1);

	// Make sure that the ADDE operands are not coming from the same node.			// Make sure that the ADDE operands are not coming from the same node.
	if (AddeOp0.getNode() == AddeOp1.getNode())			if (AddeOp0.getNode() == AddeOp1.getNode())
	return SDValue();			return SDValue();

	▲ Show 20 Lines • Show All 143 Lines • ▼ Show 20 Lines
	return SDValue(AddcNode, 0);			return SDValue(AddcNode, 0);
	}			}
	return SDValue();			return SDValue();
	}			}

	/// PerformADDCCombine - Target-specific dag combine transform from			/// PerformADDCCombine - Target-specific dag combine transform from
	/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or			/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or
	/// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL			/// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
				/// ISD::ADDC, ISD::ADDE and ISD::MUL to SMLAL[B\|T]
	static SDValue PerformADDCCombine(SDNode *N,			static SDValue PerformADDCCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,			TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {			const ARMSubtarget *Subtarget) {
	if (Subtarget->isThumb1Only()) return SDValue();			if (Subtarget->isThumb1Only()) return SDValue();

	// Only perform the checks after legalize when the pattern is available.			// Only perform the checks after legalize when the pattern is available.
	if (DCI.isBeforeLegalize()) return SDValue();			if (DCI.isBeforeLegalize()) return SDValue();

	▲ Show 20 Lines • Show All 92 Lines • Show Last 20 Lines

lib/Target/ARM/ARMInstrInfo.td

	Show First 20 Lines • Show All 86 Lines • ▼ Show 20 Lines
	// SDTBinaryArithWithFlagsInOut - RES1, CPSR = op LHS, RHS, CPSR			// SDTBinaryArithWithFlagsInOut - RES1, CPSR = op LHS, RHS, CPSR
	def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,			def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
	[SDTCisSameAs<0, 2>,			[SDTCisSameAs<0, 2>,
	SDTCisSameAs<0, 3>,			SDTCisSameAs<0, 3>,
	SDTCisInt<0>,			SDTCisInt<0>,
	SDTCisVT<1, i32>,			SDTCisVT<1, i32>,
	SDTCisVT<4, i32>]>;			SDTCisVT<4, i32>]>;

				def SDT_LongMac : SDTypeProfile<2, 4, [SDTCisVT<0, i32>,
				SDTCisSameAs<0, 1>,
				SDTCisSameAs<0, 2>,
				SDTCisSameAs<0, 3>,
				SDTCisSameAs<0, 4>,
				SDTCisSameAs<0, 5>]>;

	// Node definitions.			// Node definitions.
	def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;			def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
	def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;			def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
	def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntUnaryOp>;			def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntUnaryOp>;

	def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,			def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
	[SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;			[SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
	def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd,			def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd,
	▲ Show 20 Lines • Show All 75 Lines • ▼ Show 20 Lines
	[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;			[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;

	def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;			def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;

	def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY,			def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY,
	[SDNPHasChain, SDNPInGlue, SDNPOutGlue,			[SDNPHasChain, SDNPInGlue, SDNPOutGlue,
	SDNPMayStore, SDNPMayLoad]>;			SDNPMayStore, SDNPMayLoad]>;

				def ARMsmlalbb : SDNode<"ARMISD::SMLALBB", SDT_LongMac, []>;
				def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>;
				def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>;
				def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>;

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// ARM Instruction Predicate Definitions.			// ARM Instruction Predicate Definitions.
	//			//
	def HasV4T : Predicate<"Subtarget->hasV4TOps()">,			def HasV4T : Predicate<"Subtarget->hasV4TOps()">,
	AssemblerPredicate<"HasV4TOps", "armv4t">;			AssemblerPredicate<"HasV4TOps", "armv4t">;
	def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;			def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
	def HasV5T : Predicate<"Subtarget->hasV5TOps()">,			def HasV5T : Predicate<"Subtarget->hasV5TOps()">,
	AssemblerPredicate<"HasV5TOps", "armv5t">;			AssemblerPredicate<"HasV5TOps", "armv5t">;
	▲ Show 20 Lines • Show All 184 Lines • ▼ Show 20 Lines
	Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;			Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
	}			}
	}			}

	defm SMUL : AI_smul<"smul">;			defm SMUL : AI_smul<"smul">;
	defm SMLA : AI_smla<"smla">;			defm SMLA : AI_smla<"smla">;

	// Halfword multiply accumulate long: SMLAL<x><y>.			// Halfword multiply accumulate long: SMLAL<x><y>.
	def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),			class SMLAL<bits<2> opc1, string asm>
	(ins GPRnopc:$Rn, GPRnopc:$Rm),			: AMulxyI64<0b0001010, opc1,
	IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,			(outs GPRnopc:$RdLo, GPRnopc:$RdHi),
	Requires<[IsARM, HasV5TE]>,			(ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi),
	Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;			IIC_iMAC64, asm, "\t$RdLo, $RdHi, $Rn, $Rm", []>,
				RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
	def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),			Requires<[IsARM, HasV5TE]>,
	(ins GPRnopc:$Rn, GPRnopc:$Rm),			Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
	IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
	Requires<[IsARM, HasV5TE]>,			def SMLALBB : SMLAL<0b00, "smlalbb">;
	Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;			def SMLALBT : SMLAL<0b10, "smlalbt">;
				def SMLALTB : SMLAL<0b01, "smlaltb">;
	def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),			def SMLALTT : SMLAL<0b11, "smlaltt">;
	(ins GPRnopc:$Rn, GPRnopc:$Rm),
	IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,			def : Pat<(ARMsmlalbb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
				t.p.northoverUnsubmitted Not Done Reply Inline Actions I think these should be ARMPat or they'll still be valid in Thumb mode. You'll also need Thumb patterns to select that variant. t.p.northover: I think these should be ARMPat or they'll still be valid in Thumb mode. You'll also need Thumb…
				samparkerAuthorUnsubmitted Not Done Reply Inline Actions cheers! samparker: cheers!
	Requires<[IsARM, HasV5TE]>,			(SMLALBB $Rn, $Rm, $RLo, $RHi)>;
	Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;			def : Pat<(ARMsmlalbt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
				(SMLALBT $Rn, $Rm, $RLo, $RHi)>;
	def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),			def : Pat<(ARMsmlaltb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
	(ins GPRnopc:$Rn, GPRnopc:$Rm),			(SMLALTB $Rn, $Rm, $RLo, $RHi)>;
	IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,			def : Pat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
	Requires<[IsARM, HasV5TE]>,			(SMLALTT $Rn, $Rm, $RLo, $RHi)>;
	Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;

	// Helper class for AI_smld.			// Helper class for AI_smld.
	class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,			class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
	InstrItinClass itin, string opc, string asm>			InstrItinClass itin, string opc, string asm>
	: AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {			: AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
	bits<4> Rn;			bits<4> Rn;
	bits<4> Rm;			bits<4> Rm;
	let Inst{27-23} = 0b01110;			let Inst{27-23} = 0b01110;
	▲ Show 20 Lines • Show All 92 Lines • Show Last 20 Lines

lib/Target/ARM/ARMInstrThumb2.td

	Show First 20 Lines • Show All 92 Lines • ▼ Show 20 Lines
	(t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;			(t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
	def : Thumb2DSPMulPat<(add rGPR:$Ra,			def : Thumb2DSPMulPat<(add rGPR:$Ra,
	(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16)))),			(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16)))),
	(t2SMLABT rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;			(t2SMLABT rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
	def : Thumb2DSPMulPat<(add rGPR:$Ra,			def : Thumb2DSPMulPat<(add rGPR:$Ra,
	(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)),			(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)),
	(t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;			(t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;

	class T2SMLAL<bits<3> op22_20, bits<4> op7_4, string opc, list<dag> pattern>
	: T2FourReg_mac<1, op22_20, op7_4,
	(outs rGPR:$Ra, rGPR:$Rd),
	(ins rGPR:$Rn, rGPR:$Rm),
	IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>,
	Requires<[IsThumb2, HasDSP]>;

	// Halfword multiple accumulate long: SMLAL<x><y>			// Halfword multiple accumulate long: SMLAL<x><y>
	def t2SMLALBB : T2SMLAL<0b100, 0b1000, "smlalbb", []>;			def t2SMLALBB : T2MlaLong<0b100, 0b1000, "smlalbb">,
	def t2SMLALBT : T2SMLAL<0b100, 0b1001, "smlalbt", []>;			Requires<[IsThumb2, HasDSP]>;
	def t2SMLALTB : T2SMLAL<0b100, 0b1010, "smlaltb", []>;			def t2SMLALBT : T2MlaLong<0b100, 0b1001, "smlalbt">,
	def t2SMLALTT : T2SMLAL<0b100, 0b1011, "smlaltt", []>;			Requires<[IsThumb2, HasDSP]>;
				def t2SMLALTB : T2MlaLong<0b100, 0b1010, "smlaltb">,
				Requires<[IsThumb2, HasDSP]>;
				def t2SMLALTT : T2MlaLong<0b100, 0b1011, "smlaltt">,
				Requires<[IsThumb2, HasDSP]>;

	class T2DualHalfMul<bits<3> op22_20, bits<4> op7_4, string opc>			class T2DualHalfMul<bits<3> op22_20, bits<4> op7_4, string opc>
	: T2ThreeReg_mac<0, op22_20, op7_4,			: T2ThreeReg_mac<0, op22_20, op7_4,
	(outs rGPR:$Rd),			(outs rGPR:$Rd),
	(ins rGPR:$Rn, rGPR:$Rm),			(ins rGPR:$Rn, rGPR:$Rm),
	IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm", []>,			IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm", []>,
	Requires<[IsThumb2, HasDSP]> {			Requires<[IsThumb2, HasDSP]> {
	let Inst{15-12} = 0b1111;			let Inst{15-12} = 0b1111;
	▲ Show 20 Lines • Show All 92 Lines • Show Last 20 Lines

lib/Target/ARM/ARMPatternHelpers.h

This file was added.

				//===-- ARMPatternHelpers.h - ARM Instruction Pattern Matching -----------===//
				//
				t.p.northoverUnsubmitted Not Done Reply Inline Actions Names beginning with a underscore and an upper-case letter are reserved in all contexts so you should drop the leading underscore. You'll also need to add a copyright header. Though this whole file becomes irrelevant if you implement the actual selection in .td files and these functions can move to ARMISelLowering.h. t.p.northover: Names beginning with a underscore and an upper-case letter are reserved in all contexts so you…
				samparkerAuthorUnsubmitted Not Done Reply Inline Actions Ok. I'd still want to keep the helpers because they're used for pattern matching of different instructions in DAGToDAG as well. samparker: Ok. I'd still want to keep the helpers because they're used for pattern matching of different…
				// The LLVM Compiler Infrastructure
				//
				// This file is distributed under the University of Illinois Open Source
				// License. See LICENSE.TXT for details.
				//
				//===----------------------------------------------------------------------===//
				//
				// This file defines functions to identify common patterns used during lowering
				// and instruction selection.
				//
				//===----------------------------------------------------------------------===//

				#ifndef LLVM_LIB_TARGET_ARM_PATTERNS_H
				#define LLVM_LIB_TARGET_ARM_PATTERNS_H

				namespace llvm {

				class SDValue;

				bool isBottomS16(const SDValue &Op, SelectionDAG *DAG);
				bool isSRL16(const SDValue &Op);
				bool isSRA16(const SDValue &Op);
				bool isSHL16(const SDValue &Op);
				SDValue get16BitVal(const SDValue &Op);

				}

				#endif

lib/Target/ARM/ARMPatternHelpers.cpp

This file was added.

				//===-- ARMPatternHelpers.cpp - ARM Instruction Pattern Matching ---------===//
				//
				// The LLVM Compiler Infrastructure
				//
				// This file is distributed under the University of Illinois Open Source
				// License. See LICENSE.TXT for details.
				//
				//===----------------------------------------------------------------------===//
				efriedmaUnsubmitted Not Done Reply Inline Actions I know you're just moving the code, but it doesn't make sense to check for SIGN_EXTEND here. ARM doesn't have 16-bit integer registers. efriedma: I know you're just moving the code, but it doesn't make sense to check for SIGN_EXTEND here.
				//
				// This file defines functions to identify common patterns used during lowering
				// and instruction selection.
				//
				//===----------------------------------------------------------------------===//

				#include "llvm/CodeGen/ISDOpcodes.h"
				#include "llvm/CodeGen/SelectionDAGNodes.h"
				#include "llvm/CodeGen/SelectionDAG.h"

				namespace llvm {

				bool isSRL16(const SDValue &Op) {
				if (Op.getOpcode() != ISD::SRL)
				return false;
				if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
				return Const->getZExtValue() == 16;
				return false;
				}

				bool isSRA16(const SDValue &Op) {
				if (Op.getOpcode() != ISD::SRA)
				return false;
				if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
				return Const->getZExtValue() == 16;
				return false;
				}

				bool isSHL16(const SDValue &Op) {
				if (Op.getOpcode() != ISD::SHL)
				return false;
				if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
				return Const->getZExtValue() == 16;
				return false;
				}

				bool isBottomS16(const SDValue &Op, SelectionDAG *CurDAG) {
				if (isSRA16(Op))
				return isSHL16(Op.getOperand(0));

				return CurDAG->ComputeNumSignBits(Op) == 17;
				}

				SDValue get16BitVal(const SDValue &Op) {
				if (Op.getOpcode() == ISD::LOAD)
				return Op;
				if (Op.getOpcode() == ISD::SRA &&
				Op.getOperand(0).getOpcode() == ISD::SHL)
				return Op.getOperand(0).getOperand(0);

				return Op.getOperand(0);
				}
				t.p.northoverUnsubmitted Not Done Reply Inline Actions This combination looks extremely dodgy to me. ComputeNumSignBits makes no promises about how it's going to find those bits, and in fact traverses nodes down to a depth of 6. You've already found two cases where the final SDNode's operand(0) isn't actually the unextended node, I see no reason to think that's all there is. t.p.northover: This combination looks extremely dodgy to me. ComputeNumSignBits makes no promises about how…
				samparkerAuthorUnsubmitted Not Done Reply Inline Actions Ah yes... Do you think it's worth adding a function to SelectionDAG that returns the SDValue that ComputeNumSignBits finds and uses for its calculation? samparker: Ah yes... Do you think it's worth adding a function to SelectionDAG that returns the SDValue…
				efriedmaUnsubmitted Not Done Reply Inline Actions The function you want already exists; it's called SimplifyDemandedBits. efriedma: The function you want already exists; it's called SimplifyDemandedBits.
				samparkerAuthorUnsubmitted Not Done Reply Inline Actions Great, I'll look into it. Thanks samparker: Great, I'll look into it. Thanks

				}

lib/Target/ARM/CMakeLists.txt

Show All 40 Lines	add_llvm_target(ARMCodeGen
ARMFrameLowering.cpp		ARMFrameLowering.cpp
ARMHazardRecognizer.cpp		ARMHazardRecognizer.cpp
ARMISelDAGToDAG.cpp		ARMISelDAGToDAG.cpp
ARMISelLowering.cpp		ARMISelLowering.cpp
ARMInstrInfo.cpp		ARMInstrInfo.cpp
ARMLoadStoreOptimizer.cpp		ARMLoadStoreOptimizer.cpp
ARMMCInstLower.cpp		ARMMCInstLower.cpp
ARMMachineFunctionInfo.cpp		ARMMachineFunctionInfo.cpp
		ARMPatternHelpers.cpp
ARMRegisterInfo.cpp		ARMRegisterInfo.cpp
ARMOptimizeBarriersPass.cpp		ARMOptimizeBarriersPass.cpp
ARMSelectionDAGInfo.cpp		ARMSelectionDAGInfo.cpp
ARMSubtarget.cpp		ARMSubtarget.cpp
ARMTargetMachine.cpp		ARMTargetMachine.cpp
ARMTargetObjectFile.cpp		ARMTargetObjectFile.cpp
ARMTargetTransformInfo.cpp		ARMTargetTransformInfo.cpp
MLxExpansionPass.cpp		MLxExpansionPass.cpp
Show All 15 Lines

test/CodeGen/ARM/longMAC.ll

	; RUN: llc -mtriple=arm-eabi %s -o - \| FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-LE			; RUN: llc -mtriple=arm-eabi %s -o - \| FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-LE
	; RUN: llc -mtriple=armv7-eabi %s -o - \| FileCheck %s --check-prefix=CHECK-V7-LE			; RUN: llc -mtriple=armv7-eabi %s -o - \| FileCheck %s --check-prefix=CHECK-V7-LE
	; RUN: llc -mtriple=armeb-eabi %s -o - \| FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE			; RUN: llc -mtriple=armeb-eabi %s -o - \| FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
	; RUN: llc -mtriple=armebv7-eabi %s -o - \| FileCheck %s -check-prefix=CHECK-V7-BE			; RUN: llc -mtriple=armebv7-eabi %s -o - \| FileCheck %s -check-prefix=CHECK-V7-BE
	; RUN: llc -mtriple=thumbv6-eabi %s -o - \| FileCheck %s -check-prefix=CHECK-V6-THUMB			; RUN: llc -mtriple=thumbv6-eabi %s -o - \| FileCheck %s -check-prefix=CHECK-V6-THUMB
	; RUN: llc -mtriple=thumbv6t2-eabi %s -o - \| FileCheck %s -check-prefix=CHECK-V6-THUMB2			; RUN: llc -mtriple=thumbv6t2-eabi %s -o - \| FileCheck %s -check-prefix=CHECK-T2-DSP
	; RUN: llc -mtriple=thumbv7-eabi %s -o - \| FileCheck %s -check-prefix=CHECK-V7-THUMB			; RUN: llc -mtriple=thumbv7-eabi %s -o - \| FileCheck %s -check-prefix=CHECK-T2-DSP
	; RUN: llc -mtriple=thumbebv7-eabi %s -o - \| FileCheck %s -check-prefix=CHECK-V7-THUMB-BE			; RUN: llc -mtriple=thumbebv7-eabi %s -o - \| FileCheck %s -check-prefix=CHECK-V7-THUMB-BE
	; RUN: llc -mtriple=thumbv6m-eabi %s -o - \| FileCheck %s -check-prefix=CHECK-V6M-THUMB			; RUN: llc -mtriple=thumbv6m-eabi %s -o - \| FileCheck %s -check-prefix=CHECK-V6M-THUMB
	; RUN: llc -mtriple=thumbv7m-eabi %s -o - \| FileCheck %s -check-prefix=CHECK-V7M-THUMB			; RUN: llc -mtriple=thumbv7m-eabi %s -o - \| FileCheck %s -check-prefix=CHECK-V7M-THUMB
	; RUN: llc -mtriple=thumbv7em-eabi %s -o - \| FileCheck %s -check-prefix=CHECK-V7EM-THUMB			; RUN: llc -mtriple=thumbv7em-eabi %s -o - \| FileCheck %s -check-prefix=CHECK-T2-DSP
				; RUN: llc -mtriple=armv5te-eabi %s -o - \| FileCheck %s -check-prefix=CHECK-V5TE
	; Check generated signed and unsigned multiply accumulate long.			; Check generated signed and unsigned multiply accumulate long.

	define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {			define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
	;CHECK-LABEL: MACLongTest1:			;CHECK-LABEL: MACLongTest1:
	;CHECK-V6-THUMB-NOT: umlal			;CHECK-V6-THUMB-NOT: umlal
	;CHECK-LE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]			;CHECK-LE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-LE: mov r0, [[RDLO]]			;CHECK-LE: mov r0, [[RDLO]]
	;CHECK-LE: mov r1, [[RDHI]]			;CHECK-LE: mov r1, [[RDHI]]
	;CHECK-BE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]			;CHECK-BE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-BE: mov r0, [[RDHI]]			;CHECK-BE: mov r0, [[RDHI]]
	;CHECK-BE: mov r1, [[RDLO]]			;CHECK-BE: mov r1, [[RDLO]]
	;CHECK-V6-THUMB2: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]			;CHECK-T2-DSP: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-V6-THUMB2: mov r0, [[RDLO]]			;CHECK-T2-DSP-NEXT: mov r0, [[RDLO]]
	;CHECK-V6-THUMB2: mov r1, [[RDHI]]			;CHECK-T2-DSP-NEXT: mov r1, [[RDHI]]
	;CHECK-V7-THUMB: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-V7-THUMB: mov r0, [[RDLO]]
	;CHECK-V7-THUMB: mov r1, [[RDHI]]
	;CHECK-V7-THUMB-BE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]			;CHECK-V7-THUMB-BE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]			;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
	;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]			;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
	%conv = zext i32 %a to i64			%conv = zext i32 %a to i64
	%conv1 = zext i32 %b to i64			%conv1 = zext i32 %b to i64
	%mul = mul i64 %conv1, %conv			%mul = mul i64 %conv1, %conv
	%add = add i64 %mul, %c			%add = add i64 %mul, %c
	ret i64 %add			ret i64 %add
	}			}

	define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c) {			define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c) {
	;CHECK-LABEL: MACLongTest2:			;CHECK-LABEL: MACLongTest2:
	;CHECK-LE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]			;CHECK-LE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-LE: mov r0, [[RDLO]]			;CHECK-LE: mov r0, [[RDLO]]
	;CHECK-LE: mov r1, [[RDHI]]			;CHECK-LE: mov r1, [[RDHI]]
	;CHECK-BE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]			;CHECK-BE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-BE: mov r0, [[RDHI]]			;CHECK-BE: mov r0, [[RDHI]]
	;CHECK-BE: mov r1, [[RDLO]]			;CHECK-BE: mov r1, [[RDLO]]
	;CHECK-V6-THUMB2: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]			;CHECK-T2-DSP: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-V6-THUMB2: mov r0, [[RDLO]]			;CHECK-T2-DSP-NEXT: mov r0, [[RDLO]]
	;CHECK-V6-THUMB2: mov r1, [[RDHI]]			;CHECK-T2-DSP-NEXT: mov r1, [[RDHI]]
	;CHECK-V7-THUMB: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-V7-THUMB: mov r0, [[RDLO]]
	;CHECK-V7-THUMB: mov r1, [[RDHI]]
	;CHECK-V7-THUMB-BE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]			;CHECK-V7-THUMB-BE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]			;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
	;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]			;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
	%conv = sext i32 %a to i64			%conv = sext i32 %a to i64
	%conv1 = sext i32 %b to i64			%conv1 = sext i32 %b to i64
	%mul = mul nsw i64 %conv1, %conv			%mul = mul nsw i64 %conv1, %conv
	%add = add nsw i64 %mul, %c			%add = add nsw i64 %mul, %c
	ret i64 %add			ret i64 %add
	Show All 12 Lines
	;CHECK-LE: mov [[RDHI:r[0-9]+]], #0			;CHECK-LE: mov [[RDHI:r[0-9]+]], #0
	;CHECK-LE: umlal [[RDLO:r[0-9]+]], [[RDHI]], r1, r0			;CHECK-LE: umlal [[RDLO:r[0-9]+]], [[RDHI]], r1, r0
	;CHECK-LE: mov r0, [[RDLO]]			;CHECK-LE: mov r0, [[RDLO]]
	;CHECK-LE: mov r1, [[RDHI]]			;CHECK-LE: mov r1, [[RDHI]]
	;CHECK-BE: mov [[RDHI:r[0-9]+]], #0			;CHECK-BE: mov [[RDHI:r[0-9]+]], #0
	;CHECK-BE: umlal [[RDLO:r[0-9]+]], [[RDHI]], r1, r0			;CHECK-BE: umlal [[RDLO:r[0-9]+]], [[RDHI]], r1, r0
	;CHECK-BE: mov r0, [[RDHI]]			;CHECK-BE: mov r0, [[RDHI]]
	;CHECK-BE: mov r1, [[RDLO]]			;CHECK-BE: mov r1, [[RDLO]]
	;CHECK-V6-THUMB2: umlal			;CHECK-T2-DSP: umlal
	;CHECK-V7-THUMB: umlal
	;CHECK-V6-THUMB-NOT: umlal			;CHECK-V6-THUMB-NOT: umlal
	%conv = zext i32 %b to i64			%conv = zext i32 %b to i64
	%conv1 = zext i32 %a to i64			%conv1 = zext i32 %a to i64
	%mul = mul i64 %conv, %conv1			%mul = mul i64 %conv, %conv1
	%conv2 = zext i32 %c to i64			%conv2 = zext i32 %c to i64
	%add = add i64 %mul, %conv2			%add = add i64 %mul, %conv2
	ret i64 %add			ret i64 %add
	}			}

	define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) {			define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) {
	;CHECK-LABEL: MACLongTest4:			;CHECK-LABEL: MACLongTest4:
	;CHECK-V6-THUMB-NOT: smlal			;CHECK-V6-THUMB-NOT: smlal
	;CHECK-V6-THUMB2: smlal			;CHECK-T2-DSP: smlal
	;CHECK-V7-THUMB: smlal
	;CHECK-LE: asr [[RDHI:r[0-9]+]], [[RDLO:r[0-9]+]], #31			;CHECK-LE: asr [[RDHI:r[0-9]+]], [[RDLO:r[0-9]+]], #31
	;CHECK-LE: smlal [[RDLO]], [[RDHI]], r1, r0			;CHECK-LE: smlal [[RDLO]], [[RDHI]], r1, r0
	;CHECK-LE: mov r0, [[RDLO]]			;CHECK-LE: mov r0, [[RDLO]]
	;CHECK-LE: mov r1, [[RDHI]]			;CHECK-LE: mov r1, [[RDHI]]
	;CHECK-BE: asr [[RDHI:r[0-9]+]], [[RDLO:r[0-9]+]], #31			;CHECK-BE: asr [[RDHI:r[0-9]+]], [[RDLO:r[0-9]+]], #31
	;CHECK-BE: smlal [[RDLO]], [[RDHI]], r1, r0			;CHECK-BE: smlal [[RDLO]], [[RDHI]], r1, r0
	;CHECK-BE: mov r0, [[RDHI]]			;CHECK-BE: mov r0, [[RDHI]]
	;CHECK-BE: mov r1, [[RDLO]]			;CHECK-BE: mov r1, [[RDLO]]
	%conv = sext i32 %b to i64			%conv = sext i32 %b to i64
	%conv1 = sext i32 %a to i64			%conv1 = sext i32 %a to i64
	%mul = mul nsw i64 %conv, %conv1			%mul = mul nsw i64 %conv, %conv1
	%conv2 = sext i32 %c to i64			%conv2 = sext i32 %c to i64
	%add = add nsw i64 %mul, %conv2			%add = add nsw i64 %mul, %conv2
	ret i64 %add			ret i64 %add
	}			}

	define i64 @MACLongTest6(i32 %a, i32 %b, i32 %c, i32 %d) {			define i64 @MACLongTest6(i32 %a, i32 %b, i32 %c, i32 %d) {
	;CHECK-LABEL: MACLongTest6:			;CHECK-LABEL: MACLongTest6:
	;CHECK-V6-THUMB-NOT: smull			;CHECK-V6-THUMB-NOT: smull
	;CHECK-V6-THUMB-NOT: smlal			;CHECK-V6-THUMB-NOT: smlal
	;CHECK: smull r12, lr, r1, r0			;CHECK: smull r12, lr, r1, r0
	;CHECK: smlal r12, lr, r3, r2			;CHECK: smlal r12, lr, r3, r2
	;CHECK-V7: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0			;CHECK-V7: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0
	;CHECK-V7: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]			;CHECK-V7: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]
	;CHECK-V7-THUMB: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0			;CHECK-T2-DSP: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0
	;CHECK-V7-THUMB: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]			;CHECK-T2-DSP: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]
	;CHECK-V6-THUMB2: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0
	;CHECK-V6-THUMB2: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]
	%conv = sext i32 %a to i64			%conv = sext i32 %a to i64
	%conv1 = sext i32 %b to i64			%conv1 = sext i32 %b to i64
	%mul = mul nsw i64 %conv1, %conv			%mul = mul nsw i64 %conv1, %conv
	%conv2 = sext i32 %c to i64			%conv2 = sext i32 %c to i64
	%conv3 = sext i32 %d to i64			%conv3 = sext i32 %d to i64
	%mul4 = mul nsw i64 %conv3, %conv2			%mul4 = mul nsw i64 %conv3, %conv2
	%add = add nsw i64 %mul4, %mul			%add = add nsw i64 %mul4, %mul
	ret i64 %add			ret i64 %add
	Show All 34 Lines
	define i64 @MACLongTest9(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {			define i64 @MACLongTest9(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
	;CHECK-LABEL: MACLongTest9:			;CHECK-LABEL: MACLongTest9:
	;CHECK-V7-LE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]			;CHECK-V7-LE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-V7-LE: mov r0, [[RDLO]]			;CHECK-V7-LE: mov r0, [[RDLO]]
	;CHECK-V7-LE: mov r1, [[RDHI]]			;CHECK-V7-LE: mov r1, [[RDHI]]
	;CHECK-V7-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]			;CHECK-V7-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-V7-BE: mov r0, [[RDHI]]			;CHECK-V7-BE: mov r0, [[RDHI]]
	;CHECK-V7-BE: mov r1, [[RDLO]]			;CHECK-V7-BE: mov r1, [[RDLO]]
	;CHECK-V6-THUMB2: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]			;CHECK-T2-DSP: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-V6-THUMB2: mov r0, [[RDLO]]			;CHECK-T2-DSP-NEXT: mov r0, [[RDLO]]
	;CHECK-V6-THUMB2: mov r1, [[RDHI]]			;CHECK-T2-DSP-NEXT: mov r1, [[RDHI]]
	;CHECK-V7-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-V7-THUMB: mov r0, [[RDLO]]
	;CHECK-V7-THUMB: mov r1, [[RDHI]]
	;CHECK-V7-THUMB-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]			;CHECK-V7-THUMB-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]			;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
	;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]			;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
	;CHECK-V7EM-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-V7EM-THUMB: mov r0, [[RDLO]]
	;CHECK-V7EM-THUMB: mov r1, [[RDHI]]
	;CHECK-NOT:umaal			;CHECK-NOT:umaal
	;CHECK-V6-THUMB-NOT: umaal			;CHECK-V6-THUMB-NOT: umaal
	;CHECK-V6M-THUMB-NOT: umaal			;CHECK-V6M-THUMB-NOT: umaal
	;CHECK-V7M-THUMB-NOT: umaal			;CHECK-V7M-THUMB-NOT: umaal
	%conv = zext i32 %lhs to i64			%conv = zext i32 %lhs to i64
	%conv1 = zext i32 %rhs to i64			%conv1 = zext i32 %rhs to i64
	%mul = mul nuw i64 %conv1, %conv			%mul = mul nuw i64 %conv1, %conv
	%conv2 = zext i32 %lo to i64			%conv2 = zext i32 %lo to i64
	%add = add i64 %mul, %conv2			%add = add i64 %mul, %conv2
	%conv3 = zext i32 %hi to i64			%conv3 = zext i32 %hi to i64
	%add2 = add i64 %add, %conv3			%add2 = add i64 %add, %conv3
	ret i64 %add2			ret i64 %add2
	}			}

	define i64 @MACLongTest10(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {			define i64 @MACLongTest10(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
	;CHECK-LABEL: MACLongTest10:			;CHECK-LABEL: MACLongTest10:
	;CHECK-V7-LE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]			;CHECK-V7-LE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-V7-LE: mov r0, [[RDLO]]			;CHECK-V7-LE: mov r0, [[RDLO]]
	;CHECK-V7-LE: mov r1, [[RDHI]]			;CHECK-V7-LE: mov r1, [[RDHI]]
	;CHECK-V7-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]			;CHECK-V7-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-V7-BE: mov r0, [[RDHI]]			;CHECK-V7-BE: mov r0, [[RDHI]]
	;CHECK-V7-BE: mov r1, [[RDLO]]			;CHECK-V7-BE: mov r1, [[RDLO]]
	;CHECK-V6-THUMB2: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]			;CHECK-T2-DSP: umaal r2, r3, r1, r0
	;CHECK-V6-THUMB2: mov r0, [[RDLO]]			;CHECK-T2-DSP-NEXT: mov r0, r2
	;CHECK-V6-THUMB2: mov r1, [[RDHI]]			;CHECK-T2-DSP-NEXT: mov r1, r3
	;CHECK-V7-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-V7-THUMB: mov r0, [[RDLO]]
	;CHECK-V7-THUMB: mov r1, [[RDHI]]
	;CHECK-V7-THUMB-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]			;CHECK-V7-THUMB-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]			;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
	;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]			;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
	;CHECK-V7EM-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
	;CHECK-V7EM-THUMB: mov r0, [[RDLO]]
	;CHECK-V7EM-THUMB: mov r1, [[RDHI]]
	;CHECK-NOT:umaal			;CHECK-NOT:umaal
	;CHECK-V6-THUMB-NOT:umaal			;CHECK-V6-THUMB-NOT:umaal
	;CHECK-V6M-THUMB-NOT: umaal			;CHECK-V6M-THUMB-NOT: umaal
	;CHECK-V7M-THUMB-NOT: umaal			;CHECK-V7M-THUMB-NOT: umaal
	%conv = zext i32 %lhs to i64			%conv = zext i32 %lhs to i64
	%conv1 = zext i32 %rhs to i64			%conv1 = zext i32 %rhs to i64
	%mul = mul nuw i64 %conv1, %conv			%mul = mul nuw i64 %conv1, %conv
	%conv2 = zext i32 %lo to i64			%conv2 = zext i32 %lo to i64
	%conv3 = zext i32 %hi to i64			%conv3 = zext i32 %hi to i64
	%add = add i64 %conv2, %conv3			%add = add i64 %conv2, %conv3
	%add2 = add i64 %add, %mul			%add2 = add i64 %add, %mul
	ret i64 %add2			ret i64 %add2
	}			}

				define i64 @MACLongTest11(i16 %a, i16 %b, i64 %c) {
				;CHECK-LABEL: MACLongTest11:
				;CHECK-LE-NOT: smlalbb
				;CHECK-BE-NOT: smlalbb
				;CHECK-V6M-THUMB-NOT: smlalbb
				;CHECK-V7M-THUMB-NOT: smlalbb
				;CHECK-T2-DSP: smlalbb r3, r2,
				;CHECK-T2-DSP-NEXT: mov r0, r3
				;CHECK-T2-DSP-NEXT: mov r1, r2
				;CHECK-V7-THUMB-BE: smlalbb r2, r3
				;CHECK-V7-THUMB-BE-NEXT: mov r0, r3
				;CHECK-V7-THUMB-BE-NEXT: mov r1, r2
				%conv = sext i16 %a to i32
				%conv1 = sext i16 %b to i32
				%mul = mul nsw i32 %conv1, %conv
				%conv2 = sext i32 %mul to i64
				%add = add nsw i64 %conv2, %c
				ret i64 %add
				}

				define i64 @MACLongTest12(i16 %b, i32 %t, i64 %c) {
				;CHECK-LABEL: MACLongTest12:
				;CHECK-LE-NOT: smlalbt
				;CHECK-BE-NOT: smlalbt
				;CHECK-V6M-THUMB-NOT: smlalbt
				;CHECK-V7M-THUMB-NOT: smlalbt
				;CHECK-T2-DSP: smlalbt r3, r2, r0, r1
				;CHECK-T2-DSP-NEXT: mov r0, r3
				;CHECK-T2-DSP-NEXT: mov r1, r2
				;CHECK-V5TE: smlalbt r3, r2, r0, r1
				efriedmaUnsubmitted Not Done Reply Inline Actions The "CHECK-V6-THUMB2-NOT" aren't that useful; can you instead use CHECK-NEXT to check that there aren't any extra instructions in the function? You might as well just use explicit register names here; given the calling convention is known, there's only one possible register assignment. efriedma: The "CHECK-V6-THUMB2-NOT" aren't that useful; can you instead use CHECK-NEXT to check that…
				samparkerAuthorUnsubmitted Not Done Reply Inline Actions I will change to explicit registers names, but I would like to keep the extra NOT checks because it caught me when I had originally lowered the operands incorrectly, which left the sxth and shifts in. samparker: I will change to explicit registers names, but I would like to keep the extra NOT checks…
				samparkerAuthorUnsubmitted Not Done Reply Inline Actions Sorry, I now understand your comment! I will make the changes samparker: Sorry, I now understand your comment! I will make the changes
				;CHECK-V5TE-NEXT: mov r0, r3
				;CHECK-V5TE-NEXT: mov r1, r2
				;CHECK-V7-THUMB-BE: smlalbt r2, r3,
				;CHECK-V7-THUMB-BE-NEXT: mov r0, r3
				;CHECK-V7-THUMB-BE-NEXT: mov r1, r2
				%conv0 = sext i16 %b to i32
				%conv1 = ashr i32 %t, 16
				%mul = mul nsw i32 %conv0, %conv1
				%conv2 = sext i32 %mul to i64
				%add = add nsw i64 %conv2, %c
				ret i64 %add
				}

				define i64 @MACLongTest13(i32 %t, i16 %b, i64 %c) {
				;CHECK-LABEL: MACLongTest13:
				;CHECK-LE-NOT: smlaltb
				;CHECK-BE-NOT: smlaltb
				;CHECK-V6M-THUMB-NOT: smlaltb
				;CHECK-V7M-THUMB-NOT: smlaltb
				;CHECK-T2-DSP: smlaltb r3, r2, r0, r1
				;CHECK-T2-DSP-NEXT: mov r0, r3
				;CHECK-T2-DSP-NEXT: mov r1, r2
				;CHECK-V5TE: smlaltb r3, r2, r0, r1
				;CHECK-V5TE-NEXT: mov r0, r3
				;CHECK-V5TE-NEXT: mov r1, r2
				;CHECK-V7-THUMB-BE: smlaltb r2, r3, r0, r1
				;CHECK-V7-THUMB-BE-NEXT: mov r0, r3
				;CHECK-V7-THUMB-BE-NEXT: mov r1, r2
				%conv0 = ashr i32 %t, 16
				%conv1= sext i16 %b to i32
				%mul = mul nsw i32 %conv0, %conv1
				%conv2 = sext i32 %mul to i64
				%add = add nsw i64 %conv2, %c
				ret i64 %add
				}

				define i64 @MACLongTest14(i32 %a, i32 %b, i64 %c) {
				;CHECK-LABEL: MACLongTest14:
				;CHECK-LE-NOT: smlaltt
				;CHECK-BE-NOT: smlaltt
				;CHECK-V6M-THUMB-NOT: smlaltt
				;CHECK-V7M-THUMB-NOT: smlaltt
				;CHECK-T2-DSP: smlaltt r3, r2,
				;CHECK-T2-DSP-NEXT: mov r0, r3
				;CHECK-T2-DSP-NEXT: mov r1, r2
				;CHECK-V5TE: smlaltt r3, r2,
				;CHECK-V5TE-NEXT: mov r0, r3
				;CHECK-V5TE-NEXT: mov r1, r2
				;CHECK-V7-THUMB-BE: smlaltt r2, r3,
				;CHECK-V7-THUMB-BE-NEXT: mov r0, r3
				;CHECK-V7-THUMB-BE-NEXT: mov r1, r2
				%conv0 = ashr i32 %a, 16
				%conv1 = ashr i32 %b, 16
				%mul = mul nsw i32 %conv1, %conv0
				%conv2 = sext i32 %mul to i64
				%add = add nsw i64 %conv2, %c
				ret i64 %add
				}

				@global_b = external global i16, align 2
				;CHECK-LABEL: MACLongTest15
				;CHECK-LE-NOT: smlaltb
				;CHECK-BE-NOT: smlaltb
				;CHECK-V6M-THUMB-NOT: smlaltb
				;CHECK-V7M-THUMB-NOT: smlaltb
				;CHECK-T2-DSP: smlaltb r3, r2, r0, r1
				;CHECK-T2-DSP-NEXT: mov r0, r3
				;CHECK-T2-DSP-NEXT: mov r1, r2
				;CHECK-V5TE: smlaltb r3, r2, r0, r1
				;CHECK-V5TE-NEXT: mov r0, r3
				;CHECK-V5TE-NEXT: mov r1, r2
				define i64 @MACLongTest15(i32 %t, i64 %acc) {
				entry:
				%0 = load i16, i16* @global_b, align 2
				%conv = sext i16 %0 to i32
				%shr = ashr i32 %t, 16
				%mul = mul nsw i32 %shr, %conv
				%conv1 = sext i32 %mul to i64
				%add = add nsw i64 %conv1, %acc
				ret i64 %add
				}

				;CHECK-LABEL: MACLongTest16
				;CHECK-LE-NOT: smlalbt
				;CHECK-BE-NOT: smlalbt
				;CHECK-V6M-THUMB-NOT: smlalbt
				;CHECK-V7M-THUMB-NOT: smlalbt
				;CHECK-T2-DSP: smlalbt r3, r2, r1, r0
				;CHECK-T2-DSP-NEXT: mov r0, r3
				;CHECK-T2-DSP-NEXT: mov r1, r2
				;CHECK-V5TE: smlalbt r3, r2, r1, r0
				;CHECK-V5TE-NEXT: mov r0, r3
				;CHECK-V5TE-NEXT: mov r1, r2
				define i64 @MACLongTest16(i32 %t, i64 %acc) {
				entry:
				%0 = load i16, i16* @global_b, align 2
				%conv = sext i16 %0 to i32
				%shr = ashr i32 %t, 16
				%mul = mul nsw i32 %conv, %shr
				%conv1 = sext i32 %mul to i64
				%add = add nsw i64 %conv1, %acc
				ret i64 %add
				}

This is an archive of the discontinued LLVM Phabricator instance.

[ARM] Enable SMLAL[B|T] instruction selection
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 89096

lib/Target/ARM/ARMISelDAGToDAG.cpp

lib/Target/ARM/ARMISelLowering.h

lib/Target/ARM/ARMISelLowering.cpp

lib/Target/ARM/ARMInstrInfo.td

lib/Target/ARM/ARMInstrThumb2.td

lib/Target/ARM/ARMPatternHelpers.h

lib/Target/ARM/ARMPatternHelpers.cpp

lib/Target/ARM/CMakeLists.txt

test/CodeGen/ARM/longMAC.ll

This is an archive of the discontinued LLVM Phabricator instance.

[ARM] Enable SMLAL[B|T] instruction selectionClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 89096

lib/Target/ARM/ARMISelDAGToDAG.cpp

lib/Target/ARM/ARMISelLowering.h

lib/Target/ARM/ARMISelLowering.cpp

lib/Target/ARM/ARMInstrInfo.td

lib/Target/ARM/ARMInstrThumb2.td

lib/Target/ARM/ARMPatternHelpers.h

lib/Target/ARM/ARMPatternHelpers.cpp

lib/Target/ARM/CMakeLists.txt

test/CodeGen/ARM/longMAC.ll

[ARM] Enable SMLAL[B|T] instruction selection
ClosedPublic