diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -101,14 +101,14 @@ char IRTranslator::ID = 0; INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", - false, false) + false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(StackProtector) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", - false, false) + false, false) static void reportTranslationError(MachineFunction &MF, const TargetPassConfig &TPC, @@ -167,7 +167,6 @@ } // namespace #endif // ifndef NDEBUG - void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); @@ -210,8 +209,7 @@ auto *VRegs = VMap.getVRegs(Val); auto *Offsets = VMap.getOffsets(Val); - assert(Val.getType()->isSized() && - "Don't know how to create an empty vreg"); + assert(Val.getType()->isSized() && "Don't know how to create an empty vreg"); SmallVector SplitTys; computeValueLLTs(*DL, *Val.getType(), SplitTys, @@ -334,14 +332,14 @@ Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Op1 = getOrCreateVReg(*U.getOperand(1)); Register Res = getOrCreateVReg(U); - CmpInst::Predicate Pred = - CI ? CI->getPredicate() : static_cast( - cast(U).getPredicate()); + CmpInst::Predicate Pred = CI ? CI->getPredicate() + : static_cast( + cast(U).getPredicate()); if (CmpInst::isIntPredicate(Pred)) MIRBuilder.buildICmp(Pred, Res, Op0, Op1); else if (Pred == CmpInst::FCMP_FALSE) - MIRBuilder.buildCopy( - Res, getOrCreateVReg(*Constant::getNullValue(U.getType()))); + MIRBuilder.buildCopy(Res, + getOrCreateVReg(*Constant::getNullValue(U.getType()))); else if (Pred == CmpInst::FCMP_TRUE) MIRBuilder.buildCopy( Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType()))); @@ -861,8 +859,8 @@ assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE && "Can only handle SLE ranges"); - const APInt& Low = cast(CB.CmpLHS)->getValue(); - const APInt& High = cast(CB.CmpRHS)->getValue(); + const APInt &Low = cast(CB.CmpLHS)->getValue(); + const APInt &High = cast(CB.CmpRHS)->getValue(); Register CmpOpReg = getOrCreateVReg(*CB.CmpMHS); if (cast(CB.CmpLHS)->isMinValue(true)) { @@ -897,16 +895,12 @@ MIB.setDebugLoc(OldDbgLoc); } -bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W, - MachineBasicBlock *SwitchMBB, - MachineBasicBlock *CurMBB, - MachineBasicBlock *DefaultMBB, - MachineIRBuilder &MIB, - MachineFunction::iterator BBI, - BranchProbability UnhandledProbs, - SwitchCG::CaseClusterIt I, - MachineBasicBlock *Fallthrough, - bool FallthroughUnreachable) { +bool IRTranslator::lowerJumpTableWorkItem( + SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB, + MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB, + MachineIRBuilder &MIB, MachineFunction::iterator BBI, + BranchProbability UnhandledProbs, SwitchCG::CaseClusterIt I, + MachineBasicBlock *Fallthrough, bool FallthroughUnreachable) { using namespace SwitchCG; MachineFunction *CurMF = SwitchMBB->getParent(); // FIXME: Optimize away range check based on pivot comparisons. @@ -968,14 +962,11 @@ } return true; } -bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, - Value *Cond, - MachineBasicBlock *Fallthrough, - bool FallthroughUnreachable, - BranchProbability UnhandledProbs, - MachineBasicBlock *CurMBB, - MachineIRBuilder &MIB, - MachineBasicBlock *SwitchMBB) { +bool IRTranslator::lowerSwitchRangeWorkItem( + SwitchCG::CaseClusterIt I, Value *Cond, MachineBasicBlock *Fallthrough, + bool FallthroughUnreachable, BranchProbability UnhandledProbs, + MachineBasicBlock *CurMBB, MachineIRBuilder &MIB, + MachineBasicBlock *SwitchMBB) { using namespace SwitchCG; const Value *RHS, *LHS, *MHS; CmpInst::Predicate Pred; @@ -1553,8 +1544,10 @@ LLT IdxTy = MRI->getType(IdxReg); if (IdxTy != OffsetTy) { if (!IdxTy.isVector() && WantSplatVector) { - IdxReg = MIRBuilder.buildSplatVector( - OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0); + IdxReg = + MIRBuilder + .buildSplatVector(OffsetTy.changeElementType(IdxTy), IdxReg) + .getReg(0); } IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0); @@ -1576,8 +1569,7 @@ } if (Offset != 0) { - auto OffsetMIB = - MIRBuilder.buildConstant(OffsetTy, Offset); + auto OffsetMIB = MIRBuilder.buildConstant(OffsetTy, Offset); MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0)); return true; } @@ -1716,109 +1708,109 @@ Register Src0 = getOrCreateVReg(*CI.getOperand(0)); Register Src1 = getOrCreateVReg(*CI.getOperand(1)); uint64_t Scale = cast(CI.getOperand(2))->getZExtValue(); - MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale }); + MIRBuilder.buildInstr(Op, {Dst}, {Src0, Src1, Scale}); return true; } unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { switch (ID) { - default: - break; - case Intrinsic::bswap: - return TargetOpcode::G_BSWAP; - case Intrinsic::bitreverse: - return TargetOpcode::G_BITREVERSE; - case Intrinsic::fshl: - return TargetOpcode::G_FSHL; - case Intrinsic::fshr: - return TargetOpcode::G_FSHR; - case Intrinsic::ceil: - return TargetOpcode::G_FCEIL; - case Intrinsic::cos: - return TargetOpcode::G_FCOS; - case Intrinsic::ctpop: - return TargetOpcode::G_CTPOP; - case Intrinsic::exp: - return TargetOpcode::G_FEXP; - case Intrinsic::exp2: - return TargetOpcode::G_FEXP2; - case Intrinsic::fabs: - return TargetOpcode::G_FABS; - case Intrinsic::copysign: - return TargetOpcode::G_FCOPYSIGN; - case Intrinsic::minnum: - return TargetOpcode::G_FMINNUM; - case Intrinsic::maxnum: - return TargetOpcode::G_FMAXNUM; - case Intrinsic::minimum: - return TargetOpcode::G_FMINIMUM; - case Intrinsic::maximum: - return TargetOpcode::G_FMAXIMUM; - case Intrinsic::canonicalize: - return TargetOpcode::G_FCANONICALIZE; - case Intrinsic::floor: - return TargetOpcode::G_FFLOOR; - case Intrinsic::fma: - return TargetOpcode::G_FMA; - case Intrinsic::log: - return TargetOpcode::G_FLOG; - case Intrinsic::log2: - return TargetOpcode::G_FLOG2; - case Intrinsic::log10: - return TargetOpcode::G_FLOG10; - case Intrinsic::ldexp: - return TargetOpcode::G_FLDEXP; - case Intrinsic::nearbyint: - return TargetOpcode::G_FNEARBYINT; - case Intrinsic::pow: - return TargetOpcode::G_FPOW; - case Intrinsic::powi: - return TargetOpcode::G_FPOWI; - case Intrinsic::rint: - return TargetOpcode::G_FRINT; - case Intrinsic::round: - return TargetOpcode::G_INTRINSIC_ROUND; - case Intrinsic::roundeven: - return TargetOpcode::G_INTRINSIC_ROUNDEVEN; - case Intrinsic::sin: - return TargetOpcode::G_FSIN; - case Intrinsic::sqrt: - return TargetOpcode::G_FSQRT; - case Intrinsic::trunc: - return TargetOpcode::G_INTRINSIC_TRUNC; - case Intrinsic::readcyclecounter: - return TargetOpcode::G_READCYCLECOUNTER; - case Intrinsic::ptrmask: - return TargetOpcode::G_PTRMASK; - case Intrinsic::lrint: - return TargetOpcode::G_INTRINSIC_LRINT; - // FADD/FMUL require checking the FMF, so are handled elsewhere. - case Intrinsic::vector_reduce_fmin: - return TargetOpcode::G_VECREDUCE_FMIN; - case Intrinsic::vector_reduce_fmax: - return TargetOpcode::G_VECREDUCE_FMAX; - case Intrinsic::vector_reduce_add: - return TargetOpcode::G_VECREDUCE_ADD; - case Intrinsic::vector_reduce_mul: - return TargetOpcode::G_VECREDUCE_MUL; - case Intrinsic::vector_reduce_and: - return TargetOpcode::G_VECREDUCE_AND; - case Intrinsic::vector_reduce_or: - return TargetOpcode::G_VECREDUCE_OR; - case Intrinsic::vector_reduce_xor: - return TargetOpcode::G_VECREDUCE_XOR; - case Intrinsic::vector_reduce_smax: - return TargetOpcode::G_VECREDUCE_SMAX; - case Intrinsic::vector_reduce_smin: - return TargetOpcode::G_VECREDUCE_SMIN; - case Intrinsic::vector_reduce_umax: - return TargetOpcode::G_VECREDUCE_UMAX; - case Intrinsic::vector_reduce_umin: - return TargetOpcode::G_VECREDUCE_UMIN; - case Intrinsic::lround: - return TargetOpcode::G_LROUND; - case Intrinsic::llround: - return TargetOpcode::G_LLROUND; + default: + break; + case Intrinsic::bswap: + return TargetOpcode::G_BSWAP; + case Intrinsic::bitreverse: + return TargetOpcode::G_BITREVERSE; + case Intrinsic::fshl: + return TargetOpcode::G_FSHL; + case Intrinsic::fshr: + return TargetOpcode::G_FSHR; + case Intrinsic::ceil: + return TargetOpcode::G_FCEIL; + case Intrinsic::cos: + return TargetOpcode::G_FCOS; + case Intrinsic::ctpop: + return TargetOpcode::G_CTPOP; + case Intrinsic::exp: + return TargetOpcode::G_FEXP; + case Intrinsic::exp2: + return TargetOpcode::G_FEXP2; + case Intrinsic::fabs: + return TargetOpcode::G_FABS; + case Intrinsic::copysign: + return TargetOpcode::G_FCOPYSIGN; + case Intrinsic::minnum: + return TargetOpcode::G_FMINNUM; + case Intrinsic::maxnum: + return TargetOpcode::G_FMAXNUM; + case Intrinsic::minimum: + return TargetOpcode::G_FMINIMUM; + case Intrinsic::maximum: + return TargetOpcode::G_FMAXIMUM; + case Intrinsic::canonicalize: + return TargetOpcode::G_FCANONICALIZE; + case Intrinsic::floor: + return TargetOpcode::G_FFLOOR; + case Intrinsic::fma: + return TargetOpcode::G_FMA; + case Intrinsic::log: + return TargetOpcode::G_FLOG; + case Intrinsic::log2: + return TargetOpcode::G_FLOG2; + case Intrinsic::log10: + return TargetOpcode::G_FLOG10; + case Intrinsic::ldexp: + return TargetOpcode::G_FLDEXP; + case Intrinsic::nearbyint: + return TargetOpcode::G_FNEARBYINT; + case Intrinsic::pow: + return TargetOpcode::G_FPOW; + case Intrinsic::powi: + return TargetOpcode::G_FPOWI; + case Intrinsic::rint: + return TargetOpcode::G_FRINT; + case Intrinsic::round: + return TargetOpcode::G_INTRINSIC_ROUND; + case Intrinsic::roundeven: + return TargetOpcode::G_INTRINSIC_ROUNDEVEN; + case Intrinsic::sin: + return TargetOpcode::G_FSIN; + case Intrinsic::sqrt: + return TargetOpcode::G_FSQRT; + case Intrinsic::trunc: + return TargetOpcode::G_INTRINSIC_TRUNC; + case Intrinsic::readcyclecounter: + return TargetOpcode::G_READCYCLECOUNTER; + case Intrinsic::ptrmask: + return TargetOpcode::G_PTRMASK; + case Intrinsic::lrint: + return TargetOpcode::G_INTRINSIC_LRINT; + // FADD/FMUL require checking the FMF, so are handled elsewhere. + case Intrinsic::vector_reduce_fmin: + return TargetOpcode::G_VECREDUCE_FMIN; + case Intrinsic::vector_reduce_fmax: + return TargetOpcode::G_VECREDUCE_FMAX; + case Intrinsic::vector_reduce_add: + return TargetOpcode::G_VECREDUCE_ADD; + case Intrinsic::vector_reduce_mul: + return TargetOpcode::G_VECREDUCE_MUL; + case Intrinsic::vector_reduce_and: + return TargetOpcode::G_VECREDUCE_AND; + case Intrinsic::vector_reduce_or: + return TargetOpcode::G_VECREDUCE_OR; + case Intrinsic::vector_reduce_xor: + return TargetOpcode::G_VECREDUCE_XOR; + case Intrinsic::vector_reduce_smax: + return TargetOpcode::G_VECREDUCE_SMAX; + case Intrinsic::vector_reduce_smin: + return TargetOpcode::G_VECREDUCE_SMIN; + case Intrinsic::vector_reduce_umax: + return TargetOpcode::G_VECREDUCE_UMAX; + case Intrinsic::vector_reduce_umin: + return TargetOpcode::G_VECREDUCE_UMIN; + case Intrinsic::lround: + return TargetOpcode::G_LROUND; + case Intrinsic::llround: + return TargetOpcode::G_LLROUND; } return Intrinsic::not_intrinsic; } @@ -1833,6 +1825,39 @@ if (Op == Intrinsic::not_intrinsic) return false; + if (MF->getTarget().getTargetTriple().isAArch64()) { + if (auto vector = dyn_cast(CI.getType())) { + if (!vector->getElementCount().isScalable() && + vector->getPrimitiveSizeInBits().getFixedValue() > 128) { + const Function &F = MF->getFunction(); + + OptimizationRemarkMissed R("gisel-irtranslator", "GISelInconvenience", + F.getSubprogram(), &F.getEntryBlock()); + + R << "size of return type (vector) exceeds 128 bit on AArch64: op=" << + std::to_string(Op) + << " size=" << + std::to_string(vector->getPrimitiveSizeInBits().getFixedValue()) + << (" (in function: " + MF->getName() + ")").str(); + ORE->emit(R); + } + } else if (auto integ = dyn_cast(CI.getType())) { + if (integ->getBitWidth() > 128) { + const Function &F = MF->getFunction(); + + OptimizationRemarkMissed R("gisel-irtranslator", "GISelInconvenience", + F.getSubprogram(), &F.getEntryBlock()); + + R << "size of return type (integer) exceeds 128 bit on AArch64: op=" << + std::to_string(Op) + << " size=" << + std::to_string(integ->getBitWidth()) + << (" (in function: " + MF->getName() + ")").str(); + ORE->emit(R); + } + } + } + // Yes. Let's translate it. SmallVector VRegs; for (const auto &Arg : CI.args()) @@ -1868,7 +1893,7 @@ } bool IRTranslator::translateConstrainedFPIntrinsic( - const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) { + const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) { fp::ExceptionBehavior EB = *FPI.getExceptionBehavior(); unsigned Opcode = getConstrainedOpcode(FPI.getIntrinsicID()); @@ -2026,9 +2051,9 @@ const DbgLabelInst &DI = cast(CI); assert(DI.getLabel() && "Missing label"); - assert(DI.getLabel()->isValidLocationForIntrinsic( - MIRBuilder.getDebugLoc()) && - "Expected inlined-at fields to agree"); + assert( + DI.getLabel()->isValidLocationForIntrinsic(MIRBuilder.getDebugLoc()) && + "Expected inlined-at fields to agree"); MIRBuilder.buildDbgLabel(DI.getLabel()); return true; @@ -2125,21 +2150,29 @@ // TODO: Preserve "int min is poison" arg in GMIR? return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder); case Intrinsic::smul_fix: - return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder); + return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, + MIRBuilder); case Intrinsic::umul_fix: - return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder); + return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, + MIRBuilder); case Intrinsic::smul_fix_sat: - return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder); + return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, + MIRBuilder); case Intrinsic::umul_fix_sat: - return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder); + return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, + MIRBuilder); case Intrinsic::sdiv_fix: - return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder); + return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, + MIRBuilder); case Intrinsic::udiv_fix: - return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder); + return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, + MIRBuilder); case Intrinsic::sdiv_fix_sat: - return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder); + return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, + MIRBuilder); case Intrinsic::udiv_fix_sat: - return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder); + return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, + MIRBuilder); case Intrinsic::fmuladd: { const TargetMachine &TM = MF->getTarget(); const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); @@ -2260,11 +2293,11 @@ case Intrinsic::ctlz: { ConstantInt *Cst = cast(CI.getArgOperand(1)); bool isTrailing = ID == Intrinsic::cttz; - unsigned Opcode = isTrailing - ? Cst->isZero() ? TargetOpcode::G_CTTZ - : TargetOpcode::G_CTTZ_ZERO_UNDEF - : Cst->isZero() ? TargetOpcode::G_CTLZ - : TargetOpcode::G_CTLZ_ZERO_UNDEF; + unsigned Opcode = isTrailing ? Cst->isZero() + ? TargetOpcode::G_CTTZ + : TargetOpcode::G_CTTZ_ZERO_UNDEF + : Cst->isZero() ? TargetOpcode::G_CTLZ + : TargetOpcode::G_CTLZ_ZERO_UNDEF; MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(CI)}, {getOrCreateVReg(*CI.getArgOperand(0))}); return true; @@ -2304,8 +2337,8 @@ case Intrinsic::write_register: { Value *Arg = CI.getArgOperand(0); MIRBuilder.buildInstr(TargetOpcode::G_WRITE_REGISTER) - .addMetadata(cast(cast(Arg)->getMetadata())) - .addUse(getOrCreateVReg(*CI.getArgOperand(1))); + .addMetadata(cast(cast(Arg)->getMetadata())) + .addUse(getOrCreateVReg(*CI.getArgOperand(1))); return true; } case Intrinsic::localescape: { @@ -2415,12 +2448,11 @@ return true; } -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" return translateConstrainedFPIntrinsic(cast(CI), MIRBuilder); - } return false; } @@ -2587,16 +2619,15 @@ MPI = MachinePointerInfo(Info.ptrVal, Info.offset); else if (Info.fallbackAddressSpace) MPI = MachinePointerInfo(*Info.fallbackAddressSpace); - MIB.addMemOperand( - MF->getMachineMemOperand(MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata())); + MIB.addMemOperand(MF->getMachineMemOperand(MPI, Info.flags, MemTy, + Alignment, CI.getAAMetadata())); } return true; } bool IRTranslator::findUnwindDestinations( - const BasicBlock *EHPadBB, - BranchProbability Prob, + const BasicBlock *EHPadBB, BranchProbability Prob, SmallVectorImpl> &UnwindDests) { EHPersonality Personality = classifyEHPersonality( @@ -2761,8 +2792,7 @@ // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. - MIRBuilder.buildInstr(TargetOpcode::EH_LABEL) - .addSym(MF->addLandingPad(&MBB)); + MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(MF->addLandingPad(&MBB)); // If the unwinder does not preserve all registers, ensure that the // function marks the clobbered registers as used. @@ -2867,8 +2897,9 @@ return true; } -bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder) { - if (!MF->getTarget().Options.TrapUnreachable) +bool IRTranslator::translateUnreachable(const User &U, + MachineIRBuilder &MIRBuilder) { + if (!MF->getTarget().Options.TrapUnreachable) return true; auto &UI = cast(U); @@ -2877,7 +2908,7 @@ const BasicBlock &BB = *UI.getParent(); if (&UI != &BB.front()) { BasicBlock::const_iterator PredI = - std::prev(BasicBlock::const_iterator(UI)); + std::prev(BasicBlock::const_iterator(UI)); if (const CallInst *Call = dyn_cast(&*PredI)) { if (Call->doesNotReturn()) return true; @@ -3061,8 +3092,7 @@ return true; } -bool IRTranslator::translateFence(const User &U, - MachineIRBuilder &MIRBuilder) { +bool IRTranslator::translateFence(const User &U, MachineIRBuilder &MIRBuilder) { const FenceInst &Fence = cast(U); MIRBuilder.buildFence(static_cast(Fence.getOrdering()), Fence.getSyncScopeID()); @@ -3175,7 +3205,7 @@ } EntryBuilder->buildBuildVector(Reg, Ops); } else if (auto CE = dyn_cast(&C)) { - switch(CE->getOpcode()) { + switch (CE->getOpcode()) { #define HANDLE_INST(NUM, OPCODE, CLASS) \ case Instruction::OPCODE: \ return translate##OPCODE(*CE, *EntryBuilder.get()); @@ -3520,8 +3550,6 @@ SL = std::make_unique(this, FuncInfo); SL->init(TLI, TM, *DL); - - assert(PendingPHIs.empty() && "stale PHIs"); // Targets which want to use big endian can enable it using @@ -3550,7 +3578,7 @@ bool HasMustTailInVarArgFn = false; // Create all blocks, in IR order, to preserve the layout. - for (const BasicBlock &BB: F) { + for (const BasicBlock &BB : F) { auto *&MBB = BBToMBB[&BB]; MBB = MF->CreateMachineBasicBlock(&BB); @@ -3578,7 +3606,7 @@ // Lower the actual args into this basic block. SmallVector, 8> VRegArgs; - for (const Argument &Arg: F.args()) { + for (const Argument &Arg : F.args()) { if (DL->getTypeStoreSize(Arg.getType()).isZero()) continue; // Don't handle zero sized types. ArrayRef VRegs = getOrCreateVRegs(Arg); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1003,6 +1003,7 @@ return Legalized; } + case TargetOpcode::G_CONSTANT_FOLD_BARRIER: case TargetOpcode::G_FREEZE: { if (TypeIdx != 0) return UnableToLegalize; @@ -2186,6 +2187,7 @@ Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_CONSTANT_FOLD_BARRIER: case TargetOpcode::G_FREEZE: Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -388,8 +388,9 @@ {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8}, {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8}, - {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8}, - {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}}) + {v8s8, p0, s64, 8}, {v4s8, p0, s32, 8}, {v4s16, p0, s64, 8}, + {v8s16, p0, s128, 8}, {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, + {v2s64, p0, s128, 8}}) .clampScalar(0, s8, s64) .lowerIf([=](const LegalityQuery &Query) { return Query.Types[0].isScalar() && @@ -512,7 +513,10 @@ }; getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) .legalIf(ExtLegalFunc) - .clampScalar(0, s64, s64); // Just for s128, others are handled above. + .clampScalar(0, s64, s64) // Just for s128, others are handled above. + .clampMaxNumElements(0, s32, 4) + .clampMaxNumElements(0, s64, 2); + getActionDefinitionsBuilder(G_TRUNC) .minScalarOrEltIf( diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant-fold-barrier.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant-fold-barrier.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant-fold-barrier.mir @@ -0,0 +1,15 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer %s -o - -global-isel-abort=1 | FileCheck %s +--- +name: odd +body: | + bb.0: + ; CHECK-LABEL: name: odd + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s64) = G_CONSTANT_FOLD_BARRIER [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s40) = G_TRUNC [[CONSTANT_FOLD_BARRIER]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s40) = COPY [[TRUNC]](s40) + %0:_(s40) = G_CONSTANT i40 1 + %1:_(s40) = G_CONSTANT_FOLD_BARRIER %0(s40) + %2:_(s40) = COPY %1(s40) +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sext-zext-128.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sext-zext-128.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sext-zext-128.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sext-zext-128.mir @@ -1,6 +1,34 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --- +name: illegal_zext_4xs64 +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: illegal_zext_4xs64 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: RET_ReallyLR + %0:_(<4 x s32>) = IMPLICIT_DEF + %2:_(<4 x s64>) = G_ZEXT %0(<4 x s32>) + RET_ReallyLR + +... +--- +name: illegal_zext_8xs32 +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: illegal_zext_8xs32 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: RET_ReallyLR + %0:_(<8 x s16>) = IMPLICIT_DEF + %2:_(<8 x s32>) = G_ZEXT %0(<8 x s16>) + RET_ReallyLR + +... +--- name: narrow_sext_s128 tracksRegLiveness: true body: | @@ -9,13 +37,14 @@ ; CHECK-LABEL: name: narrow_sext_s128 ; CHECK: liveins: $x0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64) - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64) - ; CHECK: G_STORE [[MV]](s128), [[COPY1]](p0) :: (store (s128)) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64) + ; CHECK-NEXT: G_STORE [[MV]](s128), [[COPY1]](p0) :: (store (s128)) + ; CHECK-NEXT: RET_ReallyLR %0:_(s64) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s128) = G_SEXT %0(s64) @@ -32,12 +61,13 @@ ; CHECK-LABEL: name: narrow_zext_s128 ; CHECK: liveins: $x0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64) - ; CHECK: G_STORE [[MV]](s128), [[COPY1]](p0) :: (store (s128)) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64) + ; CHECK-NEXT: G_STORE [[MV]](s128), [[COPY1]](p0) :: (store (s128)) + ; CHECK-NEXT: RET_ReallyLR %0:_(s64) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s128) = G_ZEXT %0(s64) @@ -54,14 +84,15 @@ ; CHECK-LABEL: name: narrow_zext_s128_from_s32 ; CHECK: liveins: $w0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64) - ; CHECK: G_STORE [[MV1]](s128), [[COPY1]](p0) :: (store (s128)) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64) + ; CHECK-NEXT: G_STORE [[MV1]](s128), [[COPY1]](p0) :: (store (s128)) + ; CHECK-NEXT: RET_ReallyLR %0:_(s32) = COPY $w0 %1:_(p0) = COPY $x1 %2:_(s128) = G_ZEXT %0(s32) @@ -78,13 +109,14 @@ ; CHECK-LABEL: name: narrow_zext_s192 ; CHECK: liveins: $x0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64)) - ; CHECK: G_STORE [[C]](s64), [[COPY1]](p0) :: (store (s64)) - ; CHECK: G_STORE [[C]](s64), [[COPY1]](p0) :: (store (s64)) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64)) + ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY1]](p0) :: (store (s64)) + ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY1]](p0) :: (store (s64)) + ; CHECK-NEXT: RET_ReallyLR %0:_(s64) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s192) = G_ZEXT %0(s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store.mir @@ -0,0 +1,16 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer %s -o - -global-isel-abort=1 | FileCheck %s +--- +name: justAnotherStore +body: | + bb.0: + ; CHECK-LABEL: name: justAnotherStore + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s8>) = COPY $w0 + ; CHECK-NEXT: G_STORE [[COPY1]](<4 x s8>), [[COPY]](p0) :: (store (<4 x s8>)) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) + %0:_(p0) = COPY $x0 + %1:_(<4 x s8>) = COPY $w0 + G_STORE %1:_(<4 x s8>), %0:_(p0) :: (store (<4 x s8>)) + %2:_(p0) = COPY %0(p0) +... diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -74,21 +74,35 @@ } define <8 x i32> @afunc3(<8 x i16> %v0) nounwind { -; CHECK-LABEL: afunc3: -; CHECK: // %bb.0: -; CHECK-NEXT: ushll2.4s v1, v0, #0 -; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ret +; CHECKDAG-LABEL: afunc3: +; CHECKDAG: // %bb.0: +; CHECKDAG-NEXT: ushll2.4s v1, v0, #0 +; CHECKDAG-NEXT: ushll.4s v0, v0, #0 +; CHECKDAG-NEXT: ret +; +; FALLBACK-LABEL: afunc3: +; FALLBACK: // %bb.0: +; FALLBACK-NEXT: mov d1, v0[1] +; FALLBACK-NEXT: ushll.4s v0, v0, #0 +; FALLBACK-NEXT: ushll.4s v1, v1, #0 +; FALLBACK-NEXT: ret %r = zext <8 x i16> %v0 to <8 x i32> ret <8 x i32> %r } define <8 x i32> @afunc4(<8 x i16> %v0) nounwind { -; CHECK-LABEL: afunc4: -; CHECK: // %bb.0: -; CHECK-NEXT: sshll2.4s v1, v0, #0 -; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: ret +; CHECKDAG-LABEL: afunc4: +; CHECKDAG: // %bb.0: +; CHECKDAG-NEXT: sshll2.4s v1, v0, #0 +; CHECKDAG-NEXT: sshll.4s v0, v0, #0 +; CHECKDAG-NEXT: ret +; +; FALLBACK-LABEL: afunc4: +; FALLBACK: // %bb.0: +; FALLBACK-NEXT: mov d1, v0[1] +; FALLBACK-NEXT: sshll.4s v0, v0, #0 +; FALLBACK-NEXT: sshll.4s v1, v1, #0 +; FALLBACK-NEXT: ret %r = sext <8 x i16> %v0 to <8 x i32> ret <8 x i32> %r } @@ -120,21 +134,35 @@ ;----- define <4 x i64> @zfunc1(<4 x i32> %v0) nounwind { -; CHECK-LABEL: zfunc1: -; CHECK: // %bb.0: -; CHECK-NEXT: ushll2.2d v1, v0, #0 -; CHECK-NEXT: ushll.2d v0, v0, #0 -; CHECK-NEXT: ret +; CHECKDAG-LABEL: zfunc1: +; CHECKDAG: // %bb.0: +; CHECKDAG-NEXT: ushll2.2d v1, v0, #0 +; CHECKDAG-NEXT: ushll.2d v0, v0, #0 +; CHECKDAG-NEXT: ret +; +; FALLBACK-LABEL: zfunc1: +; FALLBACK: // %bb.0: +; FALLBACK-NEXT: mov d1, v0[1] +; FALLBACK-NEXT: ushll.2d v0, v0, #0 +; FALLBACK-NEXT: ushll.2d v1, v1, #0 +; FALLBACK-NEXT: ret %r = zext <4 x i32> %v0 to <4 x i64> ret <4 x i64> %r } define <4 x i64> @zfunc2(<4 x i32> %v0) nounwind { -; CHECK-LABEL: zfunc2: -; CHECK: // %bb.0: -; CHECK-NEXT: sshll2.2d v1, v0, #0 -; CHECK-NEXT: sshll.2d v0, v0, #0 -; CHECK-NEXT: ret +; CHECKDAG-LABEL: zfunc2: +; CHECKDAG: // %bb.0: +; CHECKDAG-NEXT: sshll2.2d v1, v0, #0 +; CHECKDAG-NEXT: sshll.2d v0, v0, #0 +; CHECKDAG-NEXT: ret +; +; FALLBACK-LABEL: zfunc2: +; FALLBACK: // %bb.0: +; FALLBACK-NEXT: mov d1, v0[1] +; FALLBACK-NEXT: sshll.2d v0, v0, #0 +; FALLBACK-NEXT: sshll.2d v1, v1, #0 +; FALLBACK-NEXT: ret %r = sext <4 x i32> %v0 to <4 x i64> ret <4 x i64> %r } diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -49,12 +49,12 @@ ; FALLBACK-NOT: remark:{{.*}} sabdl2_8h define <8 x i16> @sabdl2_8h(ptr %A, ptr %B) nounwind { -; DAG-LABEL: sabdl2_8h: -; DAG: // %bb.0: -; DAG-NEXT: ldr d0, [x0, #8] -; DAG-NEXT: ldr d1, [x1, #8] -; DAG-NEXT: sabdl.8h v0, v0, v1 -; DAG-NEXT: ret +; DAG-LABEL: sabdl2_8h: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: sabdl.8h v0, v0, v1 +; DAG-NEXT: ret ; ; GISEL-LABEL: sabdl2_8h: ; GISEL: // %bb.0: @@ -62,7 +62,7 @@ ; GISEL-NEXT: ldr q1, [x1] ; GISEL-NEXT: ext.16b v0, v0, v0, #8 ; GISEL-NEXT: ext.16b v1, v1, v0, #8 -; GISEL-NEXT: sabdl.8h v0, v0, v1 +; GISEL-NEXT: sabdl.8h v0, v0, v1 ; GISEL-NEXT: ret %load1 = load <16 x i8>, ptr %A %load2 = load <16 x i8>, ptr %B @@ -75,12 +75,12 @@ ; FALLBACK-NOT: remark:{{.*}} sabdl2_4s define <4 x i32> @sabdl2_4s(ptr %A, ptr %B) nounwind { -; DAG-LABEL: sabdl2_4s: -; DAG: // %bb.0: -; DAG-NEXT: ldr d0, [x0, #8] -; DAG-NEXT: ldr d1, [x1, #8] -; DAG-NEXT: sabdl.4s v0, v0, v1 -; DAG-NEXT: ret +; DAG-LABEL: sabdl2_4s: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: sabdl.4s v0, v0, v1 +; DAG-NEXT: ret ; ; GISEL-LABEL: sabdl2_4s: ; GISEL: // %bb.0: @@ -101,12 +101,12 @@ ; FALLBACK-NOT: remark:{{.*}} sabdl2_2d define <2 x i64> @sabdl2_2d(ptr %A, ptr %B) nounwind { -; DAG-LABEL: sabdl2_2d: -; DAG: // %bb.0: -; DAG-NEXT: ldr d0, [x0, #8] -; DAG-NEXT: ldr d1, [x1, #8] -; DAG-NEXT: sabdl.2d v0, v0, v1 -; DAG-NEXT: ret +; DAG-LABEL: sabdl2_2d: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: sabdl.2d v0, v0, v1 +; DAG-NEXT: ret ; ; GISEL-LABEL: sabdl2_2d: ; GISEL: // %bb.0: @@ -172,12 +172,12 @@ ; FALLBACK-NOT: remark:{{.*}} uabdl2_8h define <8 x i16> @uabdl2_8h(ptr %A, ptr %B) nounwind { -; DAG-LABEL: uabdl2_8h: -; DAG: // %bb.0: -; DAG-NEXT: ldr d0, [x0, #8] -; DAG-NEXT: ldr d1, [x1, #8] -; DAG-NEXT: uabdl.8h v0, v0, v1 -; DAG-NEXT: ret +; DAG-LABEL: uabdl2_8h: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: uabdl.8h v0, v0, v1 +; DAG-NEXT: ret ; ; GISEL-LABEL: uabdl2_8h: ; GISEL: // %bb.0: @@ -185,7 +185,7 @@ ; GISEL-NEXT: ldr q1, [x1] ; GISEL-NEXT: ext.16b v0, v0, v0, #8 ; GISEL-NEXT: ext.16b v1, v1, v0, #8 -; GISEL-NEXT: uabdl.8h v0, v0, v1 +; GISEL-NEXT: uabdl.8h v0, v0, v1 ; GISEL-NEXT: ret %load1 = load <16 x i8>, ptr %A %load2 = load <16 x i8>, ptr %B @@ -199,12 +199,12 @@ ; FALLBACK-NOT: remark:{{.*}} uabdl2_4s define <4 x i32> @uabdl2_4s(ptr %A, ptr %B) nounwind { -; DAG-LABEL: uabdl2_4s: -; DAG: // %bb.0: -; DAG-NEXT: ldr d0, [x0, #8] -; DAG-NEXT: ldr d1, [x1, #8] -; DAG-NEXT: uabdl.4s v0, v0, v1 -; DAG-NEXT: ret +; DAG-LABEL: uabdl2_4s: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: uabdl.4s v0, v0, v1 +; DAG-NEXT: ret ; ; GISEL-LABEL: uabdl2_4s: ; GISEL: // %bb.0: @@ -212,7 +212,7 @@ ; GISEL-NEXT: ldr q1, [x1] ; GISEL-NEXT: ext.16b v0, v0, v0, #8 ; GISEL-NEXT: ext.16b v1, v1, v0, #8 -; GISEL-NEXT: uabdl.4s v0, v0, v1 +; GISEL-NEXT: uabdl.4s v0, v0, v1 ; GISEL-NEXT: ret %load1 = load <8 x i16>, ptr %A %load2 = load <8 x i16>, ptr %B @@ -225,12 +225,12 @@ ; FALLBACK-NOT: remark:{{.*}} uabdl2_2d define <2 x i64> @uabdl2_2d(ptr %A, ptr %B) nounwind { -; DAG-LABEL: uabdl2_2d: -; DAG: // %bb.0: -; DAG-NEXT: ldr d0, [x0, #8] -; DAG-NEXT: ldr d1, [x1, #8] -; DAG-NEXT: uabdl.2d v0, v0, v1 -; DAG-NEXT: ret +; DAG-LABEL: uabdl2_2d: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: uabdl.2d v0, v0, v1 +; DAG-NEXT: ret ; ; GISEL-LABEL: uabdl2_2d: ; GISEL: // %bb.0: @@ -238,7 +238,7 @@ ; GISEL-NEXT: ldr q1, [x1] ; GISEL-NEXT: ext.16b v0, v0, v0, #8 ; GISEL-NEXT: ext.16b v1, v1, v0, #8 -; GISEL-NEXT: uabdl.2d v0, v0, v1 +; GISEL-NEXT: uabdl.2d v0, v0, v1 ; GISEL-NEXT: ret %load1 = load <4 x i32>, ptr %A %load2 = load <4 x i32>, ptr %B @@ -314,14 +314,38 @@ declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) define i32 @uabd8h_rdx(ptr %a, ptr %b) { -; CHECK-LABEL: uabd8h_rdx: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uabd.8h v0, v0, v1 -; CHECK-NEXT: uaddlv.8h s0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; DAG-LABEL: uabd8h_rdx: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x0] +; DAG-NEXT: ldr q1, [x1] +; DAG-NEXT: uabd.8h v0, v0, v1 +; DAG-NEXT: uaddlv.8h s0, v0 +; DAG-NEXT: fmov w0, s0 +; DAG-NEXT: ret +; +; GISEL-LABEL: uabd8h_rdx: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q1, [x0] +; GISEL-NEXT: ldr q2, [x1] +; GISEL-NEXT: movi.2d v0, #0000000000000000 +; GISEL-NEXT: mov d3, v1[1] +; GISEL-NEXT: mov d4, v2[1] +; GISEL-NEXT: usubl.4s v1, v1, v2 +; GISEL-NEXT: usubl.4s v2, v3, v4 +; GISEL-NEXT: cmgt.4s v3, v0, v1 +; GISEL-NEXT: neg.4s v4, v1 +; GISEL-NEXT: cmgt.4s v0, v0, v2 +; GISEL-NEXT: shl.4s v3, v3, #31 +; GISEL-NEXT: shl.4s v0, v0, #31 +; GISEL-NEXT: neg.4s v5, v2 +; GISEL-NEXT: sshr.4s v3, v3, #31 +; GISEL-NEXT: sshr.4s v0, v0, #31 +; GISEL-NEXT: bit.16b v1, v4, v3 +; GISEL-NEXT: bsl.16b v0, v5, v2 +; GISEL-NEXT: add.4s v0, v1, v0 +; GISEL-NEXT: addv.4s s0, v0 +; GISEL-NEXT: fmov w0, s0 +; GISEL-NEXT: ret %aload = load <8 x i16>, ptr %a, align 1 %bload = load <8 x i16>, ptr %b, align 1 %aext = zext <8 x i16> %aload to <8 x i32> @@ -335,12 +359,34 @@ } define i32 @sabd8h_rdx(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: sabd8h_rdx: -; CHECK: // %bb.0: -; CHECK-NEXT: sabd.8h v0, v0, v1 -; CHECK-NEXT: uaddlv.8h s0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; DAG-LABEL: sabd8h_rdx: +; DAG: // %bb.0: +; DAG-NEXT: sabd.8h v0, v0, v1 +; DAG-NEXT: uaddlv.8h s0, v0 +; DAG-NEXT: fmov w0, s0 +; DAG-NEXT: ret +; +; GISEL-LABEL: sabd8h_rdx: +; GISEL: // %bb.0: +; GISEL-NEXT: mov d3, v0[1] +; GISEL-NEXT: mov d4, v1[1] +; GISEL-NEXT: movi.2d v2, #0000000000000000 +; GISEL-NEXT: ssubl.4s v0, v0, v1 +; GISEL-NEXT: ssubl.4s v1, v3, v4 +; GISEL-NEXT: cmgt.4s v3, v2, v0 +; GISEL-NEXT: neg.4s v4, v0 +; GISEL-NEXT: cmgt.4s v2, v2, v1 +; GISEL-NEXT: shl.4s v3, v3, #31 +; GISEL-NEXT: shl.4s v2, v2, #31 +; GISEL-NEXT: neg.4s v5, v1 +; GISEL-NEXT: sshr.4s v3, v3, #31 +; GISEL-NEXT: sshr.4s v2, v2, #31 +; GISEL-NEXT: bit.16b v0, v4, v3 +; GISEL-NEXT: bit.16b v1, v5, v2 +; GISEL-NEXT: add.4s v0, v0, v1 +; GISEL-NEXT: addv.4s s0, v0 +; GISEL-NEXT: fmov w0, s0 +; GISEL-NEXT: ret %aext = sext <8 x i16> %a to <8 x i32> %bext = sext <8 x i16> %b to <8 x i32> %abdiff = sub nsw <8 x i32> %aext, %bext @@ -1033,13 +1079,13 @@ ; FALLBACK-NOT: remark:{{.*}} sabal2_8h define <8 x i16> @sabal2_8h(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: sabal2_8h: -; DAG: // %bb.0: -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: ldr d1, [x0, #8] -; DAG-NEXT: ldr d2, [x1, #8] -; DAG-NEXT: sabal.8h v0, v1, v2 -; DAG-NEXT: ret +; DAG-LABEL: sabal2_8h: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: sabal.8h v0, v1, v2 +; DAG-NEXT: ret ; ; GISEL-LABEL: sabal2_8h: ; GISEL: // %bb.0: @@ -1063,13 +1109,13 @@ ; FALLBACK-NOT: remark:{{.*}} sabal2_4s define <4 x i32> @sabal2_4s(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: sabal2_4s: -; DAG: // %bb.0: -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: ldr d1, [x0, #8] -; DAG-NEXT: ldr d2, [x1, #8] -; DAG-NEXT: sabal.4s v0, v1, v2 -; DAG-NEXT: ret +; DAG-LABEL: sabal2_4s: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: sabal.4s v0, v1, v2 +; DAG-NEXT: ret ; ; GISEL-LABEL: sabal2_4s: ; GISEL: // %bb.0: @@ -1093,13 +1139,13 @@ ; FALLBACK-NOT: remark:{{.*}} sabal2_2d define <2 x i64> @sabal2_2d(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: sabal2_2d: -; DAG: // %bb.0: -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: ldr d1, [x0, #8] -; DAG-NEXT: ldr d2, [x1, #8] -; DAG-NEXT: sabal.2d v0, v1, v2 -; DAG-NEXT: ret +; DAG-LABEL: sabal2_2d: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: sabal.2d v0, v1, v2 +; DAG-NEXT: ret ; ; GISEL-LABEL: sabal2_2d: ; GISEL: // %bb.0: @@ -1201,13 +1247,13 @@ ; FALLBACK-NOT: remark:{{.*}} uabal2_8h define <8 x i16> @uabal2_8h(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: uabal2_8h: -; DAG: // %bb.0: -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: ldr d1, [x0, #8] -; DAG-NEXT: ldr d2, [x1, #8] -; DAG-NEXT: uabal.8h v0, v1, v2 -; DAG-NEXT: ret +; DAG-LABEL: uabal2_8h: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: uabal.8h v0, v1, v2 +; DAG-NEXT: ret ; ; GISEL-LABEL: uabal2_8h: ; GISEL: // %bb.0: @@ -1231,13 +1277,13 @@ ; FALLBACK-NOT: remark:{{.*}} uabal2_4s define <4 x i32> @uabal2_4s(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: uabal2_4s: -; DAG: // %bb.0: -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: ldr d1, [x0, #8] -; DAG-NEXT: ldr d2, [x1, #8] -; DAG-NEXT: uabal.4s v0, v1, v2 -; DAG-NEXT: ret +; DAG-LABEL: uabal2_4s: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: uabal.4s v0, v1, v2 +; DAG-NEXT: ret ; ; GISEL-LABEL: uabal2_4s: ; GISEL: // %bb.0: @@ -1261,13 +1307,13 @@ ; FALLBACK-NOT: remark:{{.*}} uabal2_2d define <2 x i64> @uabal2_2d(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: uabal2_2d: -; DAG: // %bb.0: -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: ldr d1, [x0, #8] -; DAG-NEXT: ldr d2, [x1, #8] -; DAG-NEXT: uabal.2d v0, v1, v2 -; DAG-NEXT: ret +; DAG-LABEL: uabal2_2d: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: uabal.2d v0, v1, v2 +; DAG-NEXT: ret ; ; GISEL-LABEL: uabal2_2d: ; GISEL: // %bb.0: @@ -1624,12 +1670,18 @@ ; FALLBACK-NOT: remark:{{.*}} uabdl_from_extract_dup define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { -; CHECK-LABEL: uabdl_from_extract_dup: -; CHECK: // %bb.0: -; CHECK-NEXT: dup.2s v1, w0 +; DAG-LABEL: uabdl_from_extract_dup: +; DAG: // %bb.0: +; DAG-NEXT: dup.2s v1, w0 +; DAG-NEXT: uabdl.2d v0, v0, v1 +; DAG-NEXT: ret +; +; GISEL-LABEL: uabdl_from_extract_dup: +; GISEL: // %bb.0: +; GISEL-NEXT: dup.2s v1, w0 ; GISEL-NEXT: ext.16b v0, v0, v0, #0 -; CHECK-NEXT: uabdl.2d v0, v0, v1 -; CHECK-NEXT: ret +; GISEL-NEXT: uabdl.2d v0, v0, v1 +; GISEL-NEXT: ret %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 @@ -1642,11 +1694,11 @@ ; FALLBACK-NOT: remark:{{.*}} uabdl2_from_extract_dup define <2 x i64> @uabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { -; DAG-LABEL: uabdl2_from_extract_dup: -; DAG: // %bb.0: -; DAG-NEXT: dup.4s v1, w0 -; DAG-NEXT: uabdl2.2d v0, v0, v1 -; DAG-NEXT: ret +; DAG-LABEL: uabdl2_from_extract_dup: +; DAG: // %bb.0: +; DAG-NEXT: dup.4s v1, w0 +; DAG-NEXT: uabdl2.2d v0, v0, v1 +; DAG-NEXT: ret ; ; GISEL-LABEL: uabdl2_from_extract_dup: ; GISEL: // %bb.0: @@ -1666,12 +1718,18 @@ ; FALLBACK-NOT: remark:{{.*}} sabdl_from_extract_dup define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { -; CHECK-LABEL: sabdl_from_extract_dup: -; CHECK: // %bb.0: -; CHECK-NEXT: dup.2s v1, w0 +; DAG-LABEL: sabdl_from_extract_dup: +; DAG: // %bb.0: +; DAG-NEXT: dup.2s v1, w0 +; DAG-NEXT: sabdl.2d v0, v0, v1 +; DAG-NEXT: ret +; +; GISEL-LABEL: sabdl_from_extract_dup: +; GISEL: // %bb.0: +; GISEL-NEXT: dup.2s v1, w0 ; GISEL-NEXT: ext.16b v0, v0, v0, #0 -; CHECK-NEXT: sabdl.2d v0, v0, v1 -; CHECK-NEXT: ret +; GISEL-NEXT: sabdl.2d v0, v0, v1 +; GISEL-NEXT: ret %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 @@ -1684,11 +1742,11 @@ ; FALLBACK-NOT: remark:{{.*}} sabdl2_from_extract_dup define <2 x i64> @sabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { -; DAG-LABEL: sabdl2_from_extract_dup: -; DAG: // %bb.0: -; DAG-NEXT: dup.4s v1, w0 -; DAG-NEXT: sabdl2.2d v0, v0, v1 -; DAG-NEXT: ret +; DAG-LABEL: sabdl2_from_extract_dup: +; DAG: // %bb.0: +; DAG-NEXT: dup.4s v1, w0 +; DAG-NEXT: sabdl2.2d v0, v0, v1 +; DAG-NEXT: ret ; ; GISEL-LABEL: sabdl2_from_extract_dup: ; GISEL: // %bb.0: