Index: llvm/trunk/include/llvm/CodeGen/GlobalISel/IRTranslator.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ llvm/trunk/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -24,6 +24,7 @@ #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Types.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/Allocator.h" #include "llvm/IR/Intrinsics.h" #include #include @@ -63,9 +64,83 @@ /// Interface used to lower the everything related to calls. const CallLowering *CLI; - /// Mapping of the values of the current LLVM IR function - /// to the related virtual registers. - ValueToVReg ValToVReg; + /// This class contains the mapping between the Values to vreg related data. + class ValueToVRegInfo { + public: + ValueToVRegInfo() = default; + + using VRegListT = SmallVector; + using OffsetListT = SmallVector; + + using const_vreg_iterator = + DenseMap::const_iterator; + using const_offset_iterator = + DenseMap::const_iterator; + + inline const_vreg_iterator vregs_end() const { return ValToVRegs.end(); } + + VRegListT *getVRegs(const Value &V) { + auto It = ValToVRegs.find(&V); + if (It != ValToVRegs.end()) + return It->second; + + return insertVRegs(V); + } + + OffsetListT *getOffsets(const Value &V) { + auto It = TypeToOffsets.find(V.getType()); + if (It != TypeToOffsets.end()) + return It->second; + + return insertOffsets(V); + } + + const_vreg_iterator findVRegs(const Value &V) const { + return ValToVRegs.find(&V); + } + + bool contains(const Value &V) const { + return ValToVRegs.find(&V) != ValToVRegs.end(); + } + + void reset() { + ValToVRegs.clear(); + TypeToOffsets.clear(); + VRegAlloc.DestroyAll(); + OffsetAlloc.DestroyAll(); + } + + private: + VRegListT *insertVRegs(const Value &V) { + assert(ValToVRegs.find(&V) == ValToVRegs.end() && "Value already exists"); + + // We placement new using our fast allocator since we never try to free + // the vectors until translation is finished. + auto *VRegList = new (VRegAlloc.Allocate()) VRegListT(); + ValToVRegs[&V] = VRegList; + return VRegList; + } + + OffsetListT *insertOffsets(const Value &V) { + assert(TypeToOffsets.find(V.getType()) == TypeToOffsets.end() && + "Type already exists"); + + auto *OffsetList = new (OffsetAlloc.Allocate()) OffsetListT(); + TypeToOffsets[V.getType()] = OffsetList; + return OffsetList; + } + SpecificBumpPtrAllocator VRegAlloc; + SpecificBumpPtrAllocator OffsetAlloc; + + // We store pointers to vectors here since references may be invalidated + // while we hold them if we stored the vectors directly. + DenseMap ValToVRegs; + DenseMap TypeToOffsets; + }; + + /// Mapping of the values of the current LLVM IR function to the related + /// virtual registers and offsets. + ValueToVRegInfo VMap; // N.b. it's not completely obvious that this will be sufficient for every // LLVM IR construct (with "invoke" being the obvious candidate to mess up our @@ -82,7 +157,8 @@ // List of stubbed PHI instructions, for values and basic blocks to be filled // in once all MachineBasicBlocks have been created. - SmallVector, 4> PendingPHIs; + SmallVector>, 4> + PendingPHIs; /// Record of what frame index has been allocated to specified allocas for /// this function. @@ -99,7 +175,7 @@ /// The general algorithm is: /// 1. Look for a virtual register for each operand or /// create one. - /// 2 Update the ValToVReg accordingly. + /// 2 Update the VMap accordingly. /// 2.alt. For constant arguments, if they are compile time constants, /// produce an immediate in the right operand and do not touch /// ValToReg. Actually we will go with a virtual register for each @@ -146,6 +222,19 @@ bool translateInlineAsm(const CallInst &CI, MachineIRBuilder &MIRBuilder); + // FIXME: temporary function to expose previous interface to call lowering + // until it is refactored. + /// Combines all component registers of \p V into a single scalar with size + /// "max(Offsets) + last size". + unsigned packRegs(const Value &V, MachineIRBuilder &MIRBuilder); + + void unpackRegs(const Value &V, unsigned Src, MachineIRBuilder &MIRBuilder); + + /// Returns true if the value should be split into multiple LLTs. + /// If \p Offsets is given then the split type's offsets will be stored in it. + bool valueIsSplit(const Value &V, + SmallVectorImpl *Offsets = nullptr); + /// Translate call instruction. /// \pre \p U is a call instruction. bool translateCall(const User &U, MachineIRBuilder &MIRBuilder); @@ -381,9 +470,24 @@ // * Clear the different maps. void finalizeFunction(); - /// Get the VReg that represents \p Val. - /// If such VReg does not exist, it is created. - unsigned getOrCreateVReg(const Value &Val); + /// Get the VRegs that represent \p Val. + /// Non-aggregate types have just one corresponding VReg and the list can be + /// used as a single "unsigned". Aggregates get flattened. If such VRegs do + /// not exist, they are created. + ArrayRef getOrCreateVRegs(const Value &Val); + + unsigned getOrCreateVReg(const Value &Val) { + auto Regs = getOrCreateVRegs(Val); + if (Regs.empty()) + return 0; + assert(Regs.size() == 1 && + "attempt to get single VReg for aggregate or void"); + return Regs[0]; + } + + /// Allocate some vregs and offsets in the VMap. Then populate just the + /// offsets while leaving the vregs empty. + ValueToVRegInfo::VRegListT &allocateVRegs(const Value &Val); /// Get the frame index that represents \p Val. /// If such VReg does not exist, it is created. Index: llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -106,33 +106,97 @@ MachineFunctionPass::getAnalysisUsage(AU); } -unsigned IRTranslator::getOrCreateVReg(const Value &Val) { - unsigned &ValReg = ValToVReg[&Val]; - - if (ValReg) - return ValReg; +static void computeValueLLTs(const DataLayout &DL, Type &Ty, + SmallVectorImpl &ValueTys, + SmallVectorImpl *Offsets = nullptr, + uint64_t StartingOffset = 0) { + // Given a struct type, recursively traverse the elements. + if (StructType *STy = dyn_cast(&Ty)) { + const StructLayout *SL = DL.getStructLayout(STy); + for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) + computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets, + StartingOffset + SL->getElementOffset(I)); + return; + } + // Given an array type, recursively traverse the elements. + if (ArrayType *ATy = dyn_cast(&Ty)) { + Type *EltTy = ATy->getElementType(); + uint64_t EltSize = DL.getTypeAllocSize(EltTy); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + computeValueLLTs(DL, *EltTy, ValueTys, Offsets, + StartingOffset + i * EltSize); + return; + } + // Interpret void as zero return values. + if (Ty.isVoidTy()) + return; + // Base case: we can get an LLT for this LLVM IR type. + ValueTys.push_back(getLLTForType(Ty, DL)); + if (Offsets != nullptr) + Offsets->push_back(StartingOffset * 8); +} + +IRTranslator::ValueToVRegInfo::VRegListT & +IRTranslator::allocateVRegs(const Value &Val) { + assert(!VMap.contains(Val) && "Value already allocated in VMap"); + auto *Regs = VMap.getVRegs(Val); + auto *Offsets = VMap.getOffsets(Val); + SmallVector SplitTys; + computeValueLLTs(*DL, *Val.getType(), SplitTys, + Offsets->empty() ? Offsets : nullptr); + for (unsigned i = 0; i < SplitTys.size(); ++i) + Regs->push_back(0); + return *Regs; +} + +ArrayRef IRTranslator::getOrCreateVRegs(const Value &Val) { + auto VRegsIt = VMap.findVRegs(Val); + if (VRegsIt != VMap.vregs_end()) + return *VRegsIt->second; + + if (Val.getType()->isVoidTy()) + return *VMap.getVRegs(Val); + + // Create entry for this type. + auto *VRegs = VMap.getVRegs(Val); + auto *Offsets = VMap.getOffsets(Val); - // Fill ValRegsSequence with the sequence of registers - // we need to concat together to produce the value. assert(Val.getType()->isSized() && "Don't know how to create an empty vreg"); - unsigned VReg = - MRI->createGenericVirtualRegister(getLLTForType(*Val.getType(), *DL)); - ValReg = VReg; - if (auto CV = dyn_cast(&Val)) { - bool Success = translate(*CV, VReg); + SmallVector SplitTys; + computeValueLLTs(*DL, *Val.getType(), SplitTys, + Offsets->empty() ? Offsets : nullptr); + + if (!isa(Val)) { + for (auto Ty : SplitTys) + VRegs->push_back(MRI->createGenericVirtualRegister(Ty)); + return *VRegs; + } + + if (Val.getType()->isAggregateType()) { + // UndefValue, ConstantAggregateZero + auto &C = cast(Val); + unsigned Idx = 0; + while (auto Elt = C.getAggregateElement(Idx++)) { + auto EltRegs = getOrCreateVRegs(*Elt); + std::copy(EltRegs.begin(), EltRegs.end(), std::back_inserter(*VRegs)); + } + } else { + assert(SplitTys.size() == 1 && "unexpectedly split LLT"); + VRegs->push_back(MRI->createGenericVirtualRegister(SplitTys[0])); + bool Success = translate(cast(Val), VRegs->front()); if (!Success) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", MF->getFunction().getSubprogram(), &MF->getFunction().getEntryBlock()); R << "unable to translate constant: " << ore::NV("Type", Val.getType()); reportTranslationError(*MF, *TPC, *ORE, R); - return VReg; + return *VRegs; } } - return VReg; + return *VRegs; } int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) { @@ -243,7 +307,11 @@ // The target may mess up with the insertion point, but // this is not important as a return is the last instruction // of the block anyway. - return CLI->lowerReturn(MIRBuilder, Ret, !Ret ? 0 : getOrCreateVReg(*Ret)); + + // FIXME: this interface should simplify when CallLowering gets adapted to + // multiple VRegs per Value. + unsigned VReg = Ret ? packRegs(*Ret, MIRBuilder) : 0; + return CLI->lowerReturn(MIRBuilder, Ret, VReg); } bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { @@ -342,15 +410,23 @@ if (DL->getTypeStoreSize(LI.getType()) == 0) return true; - unsigned Res = getOrCreateVReg(LI); - unsigned Addr = getOrCreateVReg(*LI.getPointerOperand()); + ArrayRef Regs = getOrCreateVRegs(LI); + ArrayRef Offsets = *VMap.getOffsets(LI); + unsigned Base = getOrCreateVReg(*LI.getPointerOperand()); + + for (unsigned i = 0; i < Regs.size(); ++i) { + unsigned Addr = 0; + MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8); + + MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8); + unsigned BaseAlign = getMemOpAlignment(LI); + auto MMO = MF->getMachineMemOperand( + Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8, + MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr, + LI.getSyncScopeID(), LI.getOrdering()); + MIRBuilder.buildLoad(Regs[i], Addr, *MMO); + } - MIRBuilder.buildLoad( - Res, Addr, - *MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()), - Flags, DL->getTypeStoreSize(LI.getType()), - getMemOpAlignment(LI), AAMDNodes(), nullptr, - LI.getSyncScopeID(), LI.getOrdering())); return true; } @@ -363,50 +439,61 @@ if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0) return true; - unsigned Val = getOrCreateVReg(*SI.getValueOperand()); - unsigned Addr = getOrCreateVReg(*SI.getPointerOperand()); + ArrayRef Vals = getOrCreateVRegs(*SI.getValueOperand()); + ArrayRef Offsets = *VMap.getOffsets(*SI.getValueOperand()); + unsigned Base = getOrCreateVReg(*SI.getPointerOperand()); - MIRBuilder.buildStore( - Val, Addr, - *MF->getMachineMemOperand( - MachinePointerInfo(SI.getPointerOperand()), Flags, - DL->getTypeStoreSize(SI.getValueOperand()->getType()), - getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSyncScopeID(), - SI.getOrdering())); + for (unsigned i = 0; i < Vals.size(); ++i) { + unsigned Addr = 0; + MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8); + + MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8); + unsigned BaseAlign = getMemOpAlignment(SI); + auto MMO = MF->getMachineMemOperand( + Ptr, Flags, (MRI->getType(Vals[i]).getSizeInBits() + 7) / 8, + MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr, + SI.getSyncScopeID(), SI.getOrdering()); + MIRBuilder.buildStore(Vals[i], Addr, *MMO); + } return true; } -bool IRTranslator::translateExtractValue(const User &U, - MachineIRBuilder &MIRBuilder) { +static uint64_t getOffsetFromIndices(const User &U, const DataLayout &DL) { const Value *Src = U.getOperand(0); Type *Int32Ty = Type::getInt32Ty(U.getContext()); - SmallVector Indices; - - // If Src is a single element ConstantStruct, translate extractvalue - // to that element to avoid inserting a cast instruction. - if (auto CS = dyn_cast(Src)) - if (CS->getNumOperands() == 1) { - unsigned Res = getOrCreateVReg(*CS->getOperand(0)); - ValToVReg[&U] = Res; - return true; - } // getIndexedOffsetInType is designed for GEPs, so the first index is the // usual array element rather than looking into the actual aggregate. + SmallVector Indices; Indices.push_back(ConstantInt::get(Int32Ty, 0)); if (const ExtractValueInst *EVI = dyn_cast(&U)) { for (auto Idx : EVI->indices()) Indices.push_back(ConstantInt::get(Int32Ty, Idx)); + } else if (const InsertValueInst *IVI = dyn_cast(&U)) { + for (auto Idx : IVI->indices()) + Indices.push_back(ConstantInt::get(Int32Ty, Idx)); } else { for (unsigned i = 1; i < U.getNumOperands(); ++i) Indices.push_back(U.getOperand(i)); } - uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices); + return 8 * static_cast( + DL.getIndexedOffsetInType(Src->getType(), Indices)); +} - unsigned Res = getOrCreateVReg(U); - MIRBuilder.buildExtract(Res, getOrCreateVReg(*Src), Offset); +bool IRTranslator::translateExtractValue(const User &U, + MachineIRBuilder &MIRBuilder) { + const Value *Src = U.getOperand(0); + uint64_t Offset = getOffsetFromIndices(U, *DL); + ArrayRef SrcRegs = getOrCreateVRegs(*Src); + ArrayRef Offsets = *VMap.getOffsets(*Src); + unsigned Idx = std::lower_bound(Offsets.begin(), Offsets.end(), Offset) - + Offsets.begin(); + auto &DstRegs = allocateVRegs(U); + + for (unsigned i = 0; i < DstRegs.size(); ++i) + DstRegs[i] = SrcRegs[Idx++]; return true; } @@ -414,37 +501,33 @@ bool IRTranslator::translateInsertValue(const User &U, MachineIRBuilder &MIRBuilder) { const Value *Src = U.getOperand(0); - Type *Int32Ty = Type::getInt32Ty(U.getContext()); - SmallVector Indices; - - // getIndexedOffsetInType is designed for GEPs, so the first index is the - // usual array element rather than looking into the actual aggregate. - Indices.push_back(ConstantInt::get(Int32Ty, 0)); - - if (const InsertValueInst *IVI = dyn_cast(&U)) { - for (auto Idx : IVI->indices()) - Indices.push_back(ConstantInt::get(Int32Ty, Idx)); - } else { - for (unsigned i = 2; i < U.getNumOperands(); ++i) - Indices.push_back(U.getOperand(i)); + uint64_t Offset = getOffsetFromIndices(U, *DL); + auto &DstRegs = allocateVRegs(U); + ArrayRef DstOffsets = *VMap.getOffsets(U); + ArrayRef SrcRegs = getOrCreateVRegs(*Src); + ArrayRef InsertedRegs = getOrCreateVRegs(*U.getOperand(1)); + auto InsertedIt = InsertedRegs.begin(); + + for (unsigned i = 0; i < DstRegs.size(); ++i) { + if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end()) + DstRegs[i] = *InsertedIt++; + else + DstRegs[i] = SrcRegs[i]; } - uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices); - - unsigned Res = getOrCreateVReg(U); - unsigned Inserted = getOrCreateVReg(*U.getOperand(1)); - MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), Inserted, Offset); - return true; } bool IRTranslator::translateSelect(const User &U, MachineIRBuilder &MIRBuilder) { - unsigned Res = getOrCreateVReg(U); unsigned Tst = getOrCreateVReg(*U.getOperand(0)); - unsigned Op0 = getOrCreateVReg(*U.getOperand(1)); - unsigned Op1 = getOrCreateVReg(*U.getOperand(2)); - MIRBuilder.buildSelect(Res, Tst, Op0, Op1); + ArrayRef ResRegs = getOrCreateVRegs(U); + ArrayRef Op0Regs = getOrCreateVRegs(*U.getOperand(1)); + ArrayRef Op1Regs = getOrCreateVRegs(*U.getOperand(2)); + + for (unsigned i = 0; i < ResRegs.size(); ++i) + MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]); + return true; } @@ -453,15 +536,16 @@ // If we're bitcasting to the source type, we can reuse the source vreg. if (getLLTForType(*U.getOperand(0)->getType(), *DL) == getLLTForType(*U.getType(), *DL)) { - // Get the source vreg now, to avoid invalidating ValToVReg. unsigned SrcReg = getOrCreateVReg(*U.getOperand(0)); - unsigned &Reg = ValToVReg[&U]; + auto &Regs = *VMap.getVRegs(U); // If we already assigned a vreg for this bitcast, we can't change that. // Emit a copy to satisfy the users we already emitted. - if (Reg) - MIRBuilder.buildCopy(Reg, SrcReg); - else - Reg = SrcReg; + if (!Regs.empty()) + MIRBuilder.buildCopy(Regs[0], SrcReg); + else { + Regs.push_back(SrcReg); + VMap.getOffsets(U)->push_back(0); + } return true; } return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder); @@ -612,14 +696,10 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, MachineIRBuilder &MIRBuilder) { - LLT Ty = getLLTForType(*CI.getOperand(0)->getType(), *DL); - LLT s1 = LLT::scalar(1); - unsigned Width = Ty.getSizeInBits(); - unsigned Res = MRI->createGenericVirtualRegister(Ty); - unsigned Overflow = MRI->createGenericVirtualRegister(s1); + ArrayRef ResRegs = getOrCreateVRegs(CI); auto MIB = MIRBuilder.buildInstr(Op) - .addDef(Res) - .addDef(Overflow) + .addDef(ResRegs[0]) + .addDef(ResRegs[1]) .addUse(getOrCreateVReg(*CI.getOperand(0))) .addUse(getOrCreateVReg(*CI.getOperand(1))); @@ -629,7 +709,6 @@ MIB.addUse(Zero); } - MIRBuilder.buildSequence(getOrCreateVReg(CI), {Res, Overflow}, {0, Width}); return true; } @@ -836,6 +915,34 @@ return true; } +unsigned IRTranslator::packRegs(const Value &V, + MachineIRBuilder &MIRBuilder) { + ArrayRef Regs = getOrCreateVRegs(V); + ArrayRef Offsets = *VMap.getOffsets(V); + LLT BigTy = getLLTForType(*V.getType(), *DL); + + if (Regs.size() == 1) + return Regs[0]; + + unsigned Dst = MRI->createGenericVirtualRegister(BigTy); + MIRBuilder.buildUndef(Dst); + for (unsigned i = 0; i < Regs.size(); ++i) { + unsigned NewDst = MRI->createGenericVirtualRegister(BigTy); + MIRBuilder.buildInsert(NewDst, Dst, Regs[i], Offsets[i]); + Dst = NewDst; + } + return Dst; +} + +void IRTranslator::unpackRegs(const Value &V, unsigned Src, + MachineIRBuilder &MIRBuilder) { + ArrayRef Regs = getOrCreateVRegs(V); + ArrayRef Offsets = *VMap.getOffsets(V); + + for (unsigned i = 0; i < Regs.size(); ++i) + MIRBuilder.buildExtract(Regs[i], Src, Offsets[i]); +} + bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { const CallInst &CI = cast(U); auto TII = MF->getTarget().getIntrinsicInfo(); @@ -855,16 +962,24 @@ ID = static_cast(TII->getIntrinsicID(F)); } + bool IsSplitType = valueIsSplit(CI); if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) { - unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI); + unsigned Res = IsSplitType ? MRI->createGenericVirtualRegister( + getLLTForType(*CI.getType(), *DL)) + : getOrCreateVReg(CI); + SmallVector Args; for (auto &Arg: CI.arg_operands()) - Args.push_back(getOrCreateVReg(*Arg)); + Args.push_back(packRegs(*Arg, MIRBuilder)); MF->getFrameInfo().setHasCalls(true); - return CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() { + bool Success = CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() { return getOrCreateVReg(*CI.getCalledValue()); }); + + if (IsSplitType) + unpackRegs(CI, Res, MIRBuilder); + return Success; } assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic"); @@ -872,7 +987,14 @@ if (translateKnownIntrinsic(CI, ID, MIRBuilder)) return true; - unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI); + unsigned Res = 0; + if (!CI.getType()->isVoidTy()) { + if (IsSplitType) + Res = + MRI->createGenericVirtualRegister(getLLTForType(*CI.getType(), *DL)); + else + Res = getOrCreateVReg(CI); + } MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory()); @@ -880,9 +1002,12 @@ // Some intrinsics take metadata parameters. Reject them. if (isa(Arg)) return false; - MIB.addUse(getOrCreateVReg(*Arg)); + MIB.addUse(packRegs(*Arg, MIRBuilder)); } + if (IsSplitType) + unpackRegs(CI, Res, MIRBuilder); + // Add a MachineMemOperand if it is a target mem intrinsic. const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); TargetLowering::IntrinsicInfo Info; @@ -926,15 +1051,18 @@ MCSymbol *BeginSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); - unsigned Res = I.getType()->isVoidTy() ? 0 : getOrCreateVReg(I); + unsigned Res = + MRI->createGenericVirtualRegister(getLLTForType(*I.getType(), *DL)); SmallVector Args; for (auto &Arg: I.arg_operands()) - Args.push_back(getOrCreateVReg(*Arg)); + Args.push_back(packRegs(*Arg, MIRBuilder)); if (!CLI->lowerCall(MIRBuilder, &I, Res, Args, [&]() { return getOrCreateVReg(*I.getCalledValue()); })) return false; + unpackRegs(I, Res, MIRBuilder); + MCSymbol *EndSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol); @@ -993,27 +1121,18 @@ return false; MBB.addLiveIn(ExceptionReg); - unsigned VReg = MRI->createGenericVirtualRegister(Tys[0]), - Tmp = MRI->createGenericVirtualRegister(Ty); - MIRBuilder.buildCopy(VReg, ExceptionReg); - MIRBuilder.buildInsert(Tmp, Undef, VReg, 0); + ArrayRef ResRegs = getOrCreateVRegs(LP); + MIRBuilder.buildCopy(ResRegs[0], ExceptionReg); unsigned SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn); if (!SelectorReg) return false; MBB.addLiveIn(SelectorReg); - - // N.b. the exception selector register always has pointer type and may not - // match the actual IR-level type in the landingpad so an extra cast is - // needed. unsigned PtrVReg = MRI->createGenericVirtualRegister(Tys[0]); MIRBuilder.buildCopy(PtrVReg, SelectorReg); + MIRBuilder.buildCast(ResRegs[1], PtrVReg); - VReg = MRI->createGenericVirtualRegister(Tys[1]); - MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT).addDef(VReg).addUse(PtrVReg); - MIRBuilder.buildInsert(getOrCreateVReg(LP), Tmp, VReg, - Tys[0].getSizeInBits()); return true; } @@ -1103,9 +1222,16 @@ // not a legal vector type in LLT. if (U.getType()->getVectorNumElements() == 1) { unsigned Elt = getOrCreateVReg(*U.getOperand(1)); - ValToVReg[&U] = Elt; + auto &Regs = *VMap.getVRegs(U); + if (Regs.empty()) { + Regs.push_back(Elt); + VMap.getOffsets(U)->push_back(0); + } else { + MIRBuilder.buildCopy(Regs[0], Elt); + } return true; } + unsigned Res = getOrCreateVReg(U); unsigned Val = getOrCreateVReg(*U.getOperand(0)); unsigned Elt = getOrCreateVReg(*U.getOperand(1)); @@ -1120,7 +1246,13 @@ // not a legal vector type in LLT. if (U.getOperand(0)->getType()->getVectorNumElements() == 1) { unsigned Elt = getOrCreateVReg(*U.getOperand(0)); - ValToVReg[&U] = Elt; + auto &Regs = *VMap.getVRegs(U); + if (Regs.empty()) { + Regs.push_back(Elt); + VMap.getOffsets(U)->push_back(0); + } else { + MIRBuilder.buildCopy(Regs[0], Elt); + } return true; } unsigned Res = getOrCreateVReg(U); @@ -1142,17 +1274,21 @@ bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) { const PHINode &PI = cast(U); - auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI); - MIB.addDef(getOrCreateVReg(PI)); - PendingPHIs.emplace_back(&PI, MIB.getInstr()); + SmallVector Insts; + for (auto Reg : getOrCreateVRegs(PI)) { + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, Reg); + Insts.push_back(MIB.getInstr()); + } + + PendingPHIs.emplace_back(&PI, std::move(Insts)); return true; } void IRTranslator::finishPendingPhis() { - for (std::pair &Phi : PendingPHIs) { + for (auto &Phi : PendingPHIs) { const PHINode *PI = Phi.first; - MachineInstrBuilder MIB(*MF, Phi.second); + ArrayRef ComponentPHIs = Phi.second; // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator // won't create extra control flow here, otherwise we need to find the @@ -1166,17 +1302,27 @@ continue; HandledPreds.insert(IRPred); - unsigned ValReg = getOrCreateVReg(*PI->getIncomingValue(i)); + ArrayRef ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i)); for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) { - assert(Pred->isSuccessor(MIB->getParent()) && + assert(Pred->isSuccessor(ComponentPHIs[0]->getParent()) && "incorrect CFG at MachineBasicBlock level"); - MIB.addUse(ValReg); - MIB.addMBB(Pred); + for (unsigned j = 0; j < ValRegs.size(); ++j) { + MachineInstrBuilder MIB(*MF, ComponentPHIs[j]); + MIB.addUse(ValRegs[j]); + MIB.addMBB(Pred); + } } } } } +bool IRTranslator::valueIsSplit(const Value &V, + SmallVectorImpl *Offsets) { + SmallVector SplitTys; + computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets); + return SplitTys.size() > 1; +} + bool IRTranslator::translate(const Instruction &Inst) { CurBuilder.setDebugLoc(Inst.getDebugLoc()); switch(Inst.getOpcode()) { @@ -1235,23 +1381,6 @@ default: return false; } - } else if (auto CS = dyn_cast(&C)) { - // Return the element if it is a single element ConstantStruct. - if (CS->getNumOperands() == 1) { - unsigned EltReg = getOrCreateVReg(*CS->getOperand(0)); - EntryBuilder.buildCast(Reg, EltReg); - return true; - } - SmallVector Ops; - SmallVector Indices; - uint64_t Offset = 0; - for (unsigned i = 0; i < CS->getNumOperands(); ++i) { - unsigned OpReg = getOrCreateVReg(*CS->getOperand(i)); - Ops.push_back(OpReg); - Indices.push_back(Offset); - Offset += MRI->getType(OpReg).getSizeInBits(); - } - EntryBuilder.buildSequence(Reg, Ops, Indices); } else if (auto CV = dyn_cast(&C)) { if (CV->getNumOperands() == 1) return translate(*CV->getOperand(0), Reg); @@ -1270,7 +1399,7 @@ // Release the memory used by the different maps we // needed during the translation. PendingPHIs.clear(); - ValToVReg.clear(); + VMap.reset(); FrameIndices.clear(); MachinePreds.clear(); // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it @@ -1330,8 +1459,10 @@ for (const Argument &Arg: F.args()) { if (DL->getTypeStoreSize(Arg.getType()) == 0) continue; // Don't handle zero sized types. - VRegArgs.push_back(getOrCreateVReg(Arg)); + VRegArgs.push_back( + MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL))); } + if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", F.getSubprogram(), &F.getEntryBlock()); @@ -1340,14 +1471,28 @@ return false; } + auto ArgIt = F.arg_begin(); + for (auto &VArg : VRegArgs) { + // If the argument is an unsplit scalar then don't use unpackRegs to avoid + // creating redundant copies. + if (!valueIsSplit(*ArgIt, VMap.getOffsets(*ArgIt))) { + auto &VRegs = *VMap.getVRegs(cast(*ArgIt)); + assert(VRegs.empty() && "VRegs already populated?"); + VRegs.push_back(VArg); + } else { + unpackRegs(*ArgIt, VArg, EntryBuilder); + } + ArgIt++; + } + // And translate the function! - for (const BasicBlock &BB: F) { + for (const BasicBlock &BB : F) { MachineBasicBlock &MBB = getMBB(BB); // Set the insertion point of all the following translations to // the end of this basic block. CurBuilder.setMBB(MBB); - for (const Instruction &Inst: BB) { + for (const Instruction &Inst : BB) { if (translate(Inst)) continue; Index: llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp @@ -187,6 +187,9 @@ const AArch64TargetLowering &TLI = *getTLI(); LLVMContext &Ctx = OrigArg.Ty->getContext(); + if (OrigArg.Ty->isVoidTy()) + return; + SmallVector SplitVTs; SmallVector Offsets; ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); Index: llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp @@ -469,7 +469,12 @@ if (!MBB.empty()) MIRBuilder.setInstr(*MBB.begin()); - return handleAssignments(MIRBuilder, ArgInfos, ArgHandler); + if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler)) + return false; + + // Move back to the end of the basic block. + MIRBuilder.setMBB(MBB); + return true; } namespace { Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -32,17 +32,6 @@ ret i128 %res } -; It happens that we don't handle ConstantArray instances yet during -; translation. Any other constant would be fine too. - -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to translate constant: [1 x double] (in function: constant) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for constant -; FALLBACK-WITH-REPORT-OUT-LABEL: constant: -; FALLBACK-WITH-REPORT-OUT: fmov d0, #1.0 -define [1 x double] @constant() { - ret [1 x double] [double 1.0] -} - ; The key problem here is that we may fail to create an MBB referenced by a ; PHI. If so, we cannot complete the G_PHI and mustn't try or bad things ; happen. @@ -185,16 +174,6 @@ br label %block } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %1:_(s96), %3:_(p0) :: (store 12 into `%struct96* undef`, align 4) (in function: nonpow2_insertvalue_narrowing) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_insertvalue_narrowing -; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_insertvalue_narrowing: -%struct96 = type { float, float, float } -define void @nonpow2_insertvalue_narrowing(float %a) { - %dummy = insertvalue %struct96 undef, float %a, 0 - store %struct96 %dummy, %struct96* undef - ret void -} - ; FALLBACK-WITH-REPORT-ERR remark: :0:0: unable to legalize instruction: G_STORE %3, %4 :: (store 12 into `i96* undef`, align 16) (in function: nonpow2_add_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_add_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_add_narrowing: Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -709,8 +709,14 @@ ; CHECK-LABEL: name: test_struct_memops ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 -; CHECK: [[VAL:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (load 8 from %ir.addr, align 4) -; CHECK: G_STORE [[VAL]](s64), [[ADDR]](p0) :: (store 8 into %ir.addr, align 4) +; CHECK: [[VAL1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST1]](s64) +; CHECK: [[VAL2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 4 from %ir.addr + 4) +; CHECK: G_STORE [[VAL1]](s8), [[ADDR]](p0) :: (store 1 into %ir.addr, align 4) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST2]](s64) +; CHECK: G_STORE [[VAL2]](s32), [[GEP2]](p0) :: (store 4 into %ir.addr + 4) define void @test_struct_memops({ i8, i32 }* %addr) { %val = load { i8, i32 }, { i8, i32 }* %addr store { i8, i32 } %val, { i8, i32 }* %addr @@ -811,10 +817,10 @@ ; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SADDO [[LHS]], [[RHS]] -; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 -; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.addr) +; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) define void @test_sadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { %res = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -828,10 +834,10 @@ ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[ZERO:%[0-9]+]]:_(s1) = G_CONSTANT i1 false ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_UADDE [[LHS]], [[RHS]], [[ZERO]] -; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 -; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.addr) +; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) define void @test_uadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { %res = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -844,10 +850,10 @@ ; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SSUBO [[LHS]], [[RHS]] -; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 -; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.subr) +; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.subr + 4, align 4) declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) define void @test_ssub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { %res = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -861,10 +867,10 @@ ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[ZERO:%[0-9]+]]:_(s1) = G_CONSTANT i1 false ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_USUBE [[LHS]], [[RHS]], [[ZERO]] -; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 -; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.subr) +; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.subr + 4, align 4) declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) define void @test_usub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { %res = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -877,10 +883,10 @@ ; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SMULO [[LHS]], [[RHS]] -; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 -; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.addr) +; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) define void @test_smul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { %res = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -893,10 +899,10 @@ ; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_UMULO [[LHS]], [[RHS]] -; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 -; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.addr) +; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) define void @test_umul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { %res = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -905,9 +911,18 @@ } ; CHECK-LABEL: name: test_extractvalue -; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD -; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT [[STRUCT]](s128), 64 -; CHECK: $w0 = COPY [[RES]] +; CHECK: %0:_(p0) = COPY $x0 +; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %0, [[CST1]](s64) +; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load 1 from %ir.addr + 4, align 4) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP %0, [[CST2]](s64) +; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.addr + 8) +; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP %0, [[CST3]](s64) +; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 4 from %ir.addr + 12) +; CHECK: $w0 = COPY [[LD3]](s32) %struct.nested = type {i8, { i8, i32 }, i32} define i32 @test_extractvalue(%struct.nested* %addr) { %struct = load %struct.nested, %struct.nested* %addr @@ -916,9 +931,22 @@ } ; CHECK-LABEL: name: test_extractvalue_agg -; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD -; CHECK: [[RES:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 32 -; CHECK: G_STORE [[RES]] +; CHECK: %0:_(p0) = COPY $x0 +; CHECK: %1:_(p0) = COPY $x1 +; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %0, [[CST1]](s64) +; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load 1 from %ir.addr + 4, align 4) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP %0, [[CST2]](s64) +; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.addr + 8) +; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP %0, [[CST3]](s64) +; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 4 from %ir.addr + 12) +; CHECK: G_STORE [[LD2]](s8), %1(p0) :: (store 1 into %ir.addr2, align 4) +; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_GEP %1, [[CST4]](s64) +; CHECK: G_STORE [[LD3]](s32), [[GEP4]](p0) :: (store 4 into %ir.addr2 + 4) define void @test_extractvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) { %struct = load %struct.nested, %struct.nested* %addr %res = extractvalue %struct.nested %struct, 1 @@ -927,10 +955,28 @@ } ; CHECK-LABEL: name: test_insertvalue -; CHECK: [[VAL:%[0-9]+]]:_(s32) = COPY $w1 -; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD -; CHECK: [[NEWSTRUCT:%[0-9]+]]:_(s128) = G_INSERT [[STRUCT]], [[VAL]](s32), 64 -; CHECK: G_STORE [[NEWSTRUCT]](s128), +; CHECK: %0:_(p0) = COPY $x0 +; CHECK: %1:_(s32) = COPY $w1 +; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %0, [[CST1]](s64) +; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load 1 from %ir.addr + 4, align 4) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP %0, [[CST2]](s64) +; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.addr + 8) +; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP %0, [[CST3]](s64) +; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 4 from %ir.addr + 12) +; CHECK: G_STORE [[LD1]](s8), %0(p0) :: (store 1 into %ir.addr, align 4) +; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_GEP %0, [[CST4]](s64) +; CHECK: G_STORE [[LD2]](s8), [[GEP4]](p0) :: (store 1 into %ir.addr + 4, align 4) +; CHECK: [[CST5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_GEP %0, [[CST5]](s64) +; CHECK: G_STORE %1(s32), [[GEP5]](p0) :: (store 4 into %ir.addr + 8) +; CHECK: [[CST6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 +; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_GEP %0, [[CST6]](s64) +; CHECK: G_STORE [[LD4]](s32), [[GEP6]](p0) :: (store 4 into %ir.addr + 12) define void @test_insertvalue(%struct.nested* %addr, i32 %val) { %struct = load %struct.nested, %struct.nested* %addr %newstruct = insertvalue %struct.nested %struct, i32 %val, 1, 1 @@ -942,8 +988,7 @@ ; CHECK-LABEL: name: test_trivial_insert ; CHECK: [[STRUCT:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[VAL:%[0-9]+]]:_(s64) = COPY $x1 -; CHECK: [[RES:%[0-9]+]]:_(s64) = COPY [[VAL]](s64) -; CHECK: $x0 = COPY [[RES]] +; CHECK: $x0 = COPY [[VAL]] %res = insertvalue [1 x i64] %s, i64 %val, 0 ret [1 x i64] %res } @@ -952,17 +997,38 @@ ; CHECK-LABEL: name: test_trivial_insert_ptr ; CHECK: [[STRUCT:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[VAL:%[0-9]+]]:_(p0) = COPY $x1 -; CHECK: [[RES:%[0-9]+]]:_(s64) = G_PTRTOINT [[VAL]](p0) -; CHECK: $x0 = COPY [[RES]] +; CHECK: $x0 = COPY [[VAL]] %res = insertvalue [1 x i8*] %s, i8* %val, 0 ret [1 x i8*] %res } ; CHECK-LABEL: name: test_insertvalue_agg -; CHECK: [[SMALLSTRUCT:%[0-9]+]]:_(s64) = G_LOAD -; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD -; CHECK: [[RES:%[0-9]+]]:_(s128) = G_INSERT [[STRUCT]], [[SMALLSTRUCT]](s64), 32 -; CHECK: G_STORE [[RES]](s128) +; CHECK: %0:_(p0) = COPY $x0 +; CHECK: %1:_(p0) = COPY $x1 +; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %1(p0) :: (load 1 from %ir.addr2, align 4) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %1, [[CST1]](s64) +; CHECK: [[LD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 4 from %ir.addr2 + 4) +; CHECK: [[LD3:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP %0, [[CST2]](s64) +; CHECK: [[LD4:%[0-9]+]]:_(s8) = G_LOAD [[GEP2]](p0) :: (load 1 from %ir.addr + 4, align 4) +; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP %0, [[CST3]](s64) +; CHECK: [[LD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 4 from %ir.addr + 8) +; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 +; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_GEP %0, [[CST4]](s64) +; CHECK: [[LD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 4 from %ir.addr + 12) +; CHECK: G_STORE [[LD3]](s8), %0(p0) :: (store 1 into %ir.addr, align 4) +; CHECK: [[CST5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_GEP %0, [[CST5]](s64) +; CHECK: G_STORE [[LD1]](s8), [[GEP5]](p0) :: (store 1 into %ir.addr + 4, align 4) +; CHECK: [[CST6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_GEP %0, [[CST6]](s64) +; CHECK: G_STORE [[LD2]](s32), [[GEP6]](p0) :: (store 4 into %ir.addr + 8) +; CHECK: [[CST7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 +; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_GEP %0, [[CST7]](s64) +; CHECK: G_STORE [[LD6]](s32), [[GEP7]](p0) :: (store 4 into %ir.addr + 12) define void @test_insertvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) { %smallstruct = load {i8, i32}, {i8, i32}* %addr2 %struct = load %struct.nested, %struct.nested* %addr @@ -1661,3 +1727,135 @@ %val = load %zerosize_type, %zerosize_type* %ptr, align 4 ret %zerosize_type %in } + + +define i64 @test_phi_loop(i32 %n) { +; CHECK-LABEL: name: test_phi_loop +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK: [[CST1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK: [[CST2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 +; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + +; CHECK: [[PN1:%[0-9]+]]:_(s32) = G_PHI [[ARG1]](s32), %bb.1, [[SUB:%[0-9]+]](s32), %bb.2 +; CHECK: [[PN2:%[0-9]+]]:_(s64) = G_PHI [[CST3]](s64), %bb.1, [[PN3:%[0-9]+]](s64), %bb.2 +; CHECK: [[PN3]]:_(s64) = G_PHI [[CST4]](s64), %bb.1, [[ADD:%[0-9]+]](s64), %bb.2 +; CHECK: [[ADD]]:_(s64) = G_ADD [[PN2]], [[PN3]] +; CHECK: [[SUB]]:_(s32) = G_SUB [[PN1]], [[CST1]] +; CHECK: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PN1]](s32), [[CST2]] +; CHECK: G_BRCOND [[CMP]](s1), %bb.3 +; CHECK: G_BR %bb.2 + +; CHECK: $x0 = COPY [[PN2]](s64) +; CHECK: RET_ReallyLR implicit $x0 +entry: + br label %loop + +loop: + %counter = phi i32 [ %n, %entry ], [ %counter.dec, %loop ] + %elem = phi { i64, i64 } [ { i64 0, i64 1 }, %entry ], [ %updated, %loop ] + %prev = extractvalue { i64, i64 } %elem, 0 + %curr = extractvalue { i64, i64 } %elem, 1 + %next = add i64 %prev, %curr + %shifted = insertvalue { i64, i64 } %elem, i64 %curr, 0 + %updated = insertvalue { i64, i64 } %shifted, i64 %next, 1 + %counter.dec = sub i32 %counter, 1 + %cond = icmp sle i32 %counter, 0 + br i1 %cond, label %exit, label %loop + +exit: + %res = extractvalue { i64, i64 } %elem, 0 + ret i64 %res +} + +define void @test_phi_diamond({ i8, i16, i32 }* %a.ptr, { i8, i16, i32 }* %b.ptr, i1 %selector, { i8, i16, i32 }* %dst) { +; CHECK-LABEL: name: test_phi_diamond +; CHECK: [[ARG1:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[ARG2:%[0-9]+]]:_(p0) = COPY $x1 +; CHECK: [[ARG3:%[0-9]+]]:_(s32) = COPY $w2 +; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ARG3]](s32) +; CHECK: [[ARG4:%[0-9]+]]:_(p0) = COPY $x3 +; CHECK: G_BRCOND [[TRUNC]](s1), %bb.2 +; CHECK: G_BR %bb.3 + +; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD [[ARG1]](p0) :: (load 1 from %ir.a.ptr, align 4) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[ARG1]], [[CST1]](s64) +; CHECK: [[LD2:%[0-9]+]]:_(s16) = G_LOAD [[GEP1]](p0) :: (load 2 from %ir.a.ptr + 2) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[ARG1]], [[CST2]](s64) +; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.a.ptr + 4) +; CHECK: G_BR %bb.4 + +; CHECK: [[LD4:%[0-9]+]]:_(s8) = G_LOAD [[ARG2]](p0) :: (load 1 from %ir.b.ptr, align 4) +; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[ARG2]], [[CST3]](s64) +; CHECK: [[LD5:%[0-9]+]]:_(s16) = G_LOAD [[GEP3]](p0) :: (load 2 from %ir.b.ptr + 2) +; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[ARG2]], [[CST4]](s64) +; CHECK: [[LD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 4 from %ir.b.ptr + 4) + +; CHECK: [[PN1:%[0-9]+]]:_(s8) = G_PHI [[LD1]](s8), %bb.2, [[LD4]](s8), %bb.3 +; CHECK: [[PN2:%[0-9]+]]:_(s16) = G_PHI [[LD2]](s16), %bb.2, [[LD5]](s16), %bb.3 +; CHECK: [[PN3:%[0-9]+]]:_(s32) = G_PHI [[LD3]](s32), %bb.2, [[LD6]](s32), %bb.3 +; CHECK: G_STORE [[PN1]](s8), [[ARG4]](p0) :: (store 1 into %ir.dst, align 4) +; CHECK: [[CST5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 +; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_GEP [[ARG4]], [[CST5]](s64) +; CHECK: G_STORE [[PN2]](s16), [[GEP5]](p0) :: (store 2 into %ir.dst + 2) +; CHECK: [[CST6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[ARG4]], [[CST6]](s64) +; CHECK: G_STORE [[PN3]](s32), [[GEP6]](p0) :: (store 4 into %ir.dst + 4) +; CHECK: RET_ReallyLR + +entry: + br i1 %selector, label %store.a, label %store.b + +store.a: + %a = load { i8, i16, i32 }, { i8, i16, i32 }* %a.ptr + br label %join + +store.b: + %b = load { i8, i16, i32 }, { i8, i16, i32 }* %b.ptr + br label %join + +join: + %v = phi { i8, i16, i32 } [ %a, %store.a ], [ %b, %store.b ] + store { i8, i16, i32 } %v, { i8, i16, i32 }* %dst + ret void +} + +%agg.inner.inner = type {i64, i64} +%agg.inner = type {i16, i8, %agg.inner.inner } +%agg.nested = type {i32, i32, %agg.inner, i32} + +define void @test_nested_aggregate_const(%agg.nested *%ptr) { +; CHECK-LABEL: name: test_nested_aggregate_const +; CHECK: [[BASE:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[CST1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK: [[CST2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 +; CHECK: [[CST3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 +; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 +; CHECK: [[CST5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[CST6:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 +; CHECK: G_STORE [[CST1]](s32), [[BASE]](p0) :: (store 4 into %ir.ptr, align 8) +; CHECK: [[CST7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST7]](s64) +; CHECK: G_STORE [[CST1]](s32), [[GEP1]](p0) :: (store 4 into %ir.ptr + 4) +; CHECK: [[CST8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST8]](s64) +; CHECK: G_STORE [[CST2]](s16), [[GEP2]](p0) :: (store 2 into %ir.ptr + 8, align 8) +; CHECK: [[CST9:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST9]](s64) +; CHECK: G_STORE [[CST3]](s8), [[GEP3]](p0) :: (store 1 into %ir.ptr + 10, align 2) +; CHECK: [[CST10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 +; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST10]](s64) +; CHECK: G_STORE [[CST4]](s64), [[GEP4]](p0) :: (store 8 into %ir.ptr + 16) +; CHECK: [[CST11:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 +; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST11]](s64) +; CHECK: G_STORE [[CST5]](s64), [[GEP5]](p0) :: (store 8 into %ir.ptr + 24) +; CHECK: [[CST12:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 +; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST12]](s64) +; CHECK: G_STORE [[CST6]](s32), [[GEP6]](p0) :: (store 4 into %ir.ptr + 32, align 8) + store %agg.nested { i32 1, i32 1, %agg.inner { i16 2, i8 3, %agg.inner.inner {i64 5, i64 8} }, i32 13}, %agg.nested *%ptr + ret void +} Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll @@ -55,19 +55,25 @@ } ; CHECK-LABEL: name: test_split_struct -; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD {{.*}}(p0) -; CHECK: [[LO:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 0 -; CHECK: [[HI:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 64 +; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.ptr) +; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP %0, [[CST]](s64) +; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.ptr + 8) +; CHECK: [[IMPDEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF +; CHECK: [[INS1:%[0-9]+]]:_(s128) = G_INSERT [[IMPDEF]], [[LD1]](s64), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s128) = G_INSERT [[INS1]], [[LD2]](s64), 64 +; CHECK: [[EXT1:%[0-9]+]]:_(s64) = G_EXTRACT [[INS2]](s128), 0 +; CHECK: [[EXT2:%[0-9]+]]:_(s64) = G_EXTRACT [[INS2]](s128), 64 ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]] -; CHECK: G_STORE [[LO]](s64), [[ADDR]](p0) :: (store 8 into stack, align 0) +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]](s64) +; CHECK: G_STORE [[EXT1]](s64), [[ADDR]](p0) :: (store 8 into stack, align 0) ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]] -; CHECK: G_STORE [[HI]](s64), [[ADDR]](p0) :: (store 8 into stack + 8, align 0) +; CHECK: G_STORE [[EXT2]](s64), [[ADDR]](p0) :: (store 8 into stack + 8, align 0) define void @test_split_struct([2 x i64]* %ptr) { %struct = load [2 x i64], [2 x i64]* %ptr call void @take_split_struct([2 x i64]* null, i64 1, i64 2, i64 3, Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator.ll +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator.ll @@ -70,8 +70,16 @@ ; CHECK: [[ARG1:%[0-9]+]]:_(s192) = G_INSERT [[ARG0]], [[I64]](s64), 64 ; CHECK: [[ARG2:%[0-9]+]]:_(s192) = G_INSERT [[ARG1]], [[I8]](s8), 128 ; CHECK: [[ARG:%[0-9]+]]:_(s192) = COPY [[ARG2]] - -; CHECK: G_STORE [[ARG]](s192), [[ADDR]](p0) +; CHECK: [[EXTA0:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s192), 0 +; CHECK: [[EXTA1:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s192), 64 +; CHECK: [[EXTA2:%[0-9]+]]:_(s8) = G_EXTRACT [[ARG]](s192), 128 +; CHECK: G_STORE [[EXTA0]](s64), [[ADDR]](p0) :: (store 8 into %ir.addr) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST1]](s64) +; CHECK: G_STORE [[EXTA1]](s64), [[GEP1]](p0) :: (store 8 into %ir.addr + 8) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST2]](s64) +; CHECK: G_STORE [[EXTA2]](s8), [[GEP2]](p0) :: (store 1 into %ir.addr + 16, align 8) ; CHECK: RET_ReallyLR define void @test_struct_formal({double, i64, i8} %in, {double, i64, i8}* %addr) { store {double, i64, i8} %in, {double, i64, i8}* %addr @@ -81,7 +89,18 @@ ; CHECK-LABEL: name: test_struct_return ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 -; CHECK: [[VAL:%[0-9]+]]:_(s192) = G_LOAD [[ADDR]](p0) + +; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (load 8 from %ir.addr) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST1]](s64) +; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load 8 from %ir.addr + 8) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST2]](s64) +; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.addr + 16, align 8) +; CHECK: [[IMPDEF:%[0-9]+]]:_(s192) = G_IMPLICIT_DEF +; CHECK: [[INS1:%[0-9]+]]:_(s192) = G_INSERT [[IMPDEF]], [[LD1]](s64), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s192) = G_INSERT [[INS1]], [[LD2]](s64), 64 +; CHECK: [[VAL:%[0-9]+]]:_(s192) = G_INSERT [[INS2]], [[LD3]](s32), 128 ; CHECK: [[DBL:%[0-9]+]]:_(s64) = G_EXTRACT [[VAL]](s192), 0 ; CHECK: [[I64:%[0-9]+]]:_(s64) = G_EXTRACT [[VAL]](s192), 64 @@ -98,8 +117,22 @@ ; CHECK-LABEL: name: test_arr_call ; CHECK: hasCalls: true -; CHECK: [[ARG:%[0-9]+]]:_(s256) = G_LOAD - +; CHECK: %0:_(p0) = COPY $x0 +; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.addr) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %0, [[CST1]](s64) +; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load 8 from %ir.addr + 8) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP %0, [[CST2]](s64) +; CHECK: [[LD3:%[0-9]+]]:_(s64) = G_LOAD [[GEP2]](p0) :: (load 8 from %ir.addr + 16) +; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP %0, [[CST3]](s64) +; CHECK: [[LD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP3]](p0) :: (load 8 from %ir.addr + 24) +; CHECK: [[IMPDEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF +; CHECK: [[INS1:%[0-9]+]]:_(s256) = G_INSERT [[IMPDEF]], [[LD1]](s64), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s256) = G_INSERT [[INS1]], [[LD2]](s64), 64 +; CHECK: [[INS3:%[0-9]+]]:_(s256) = G_INSERT [[INS2]], [[LD3]](s64), 128 +; CHECK: [[ARG:%[0-9]+]]:_(s256) = G_INSERT [[INS3]], [[LD4]](s64), 192 ; CHECK: [[E0:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s256), 0 ; CHECK: [[E1:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s256), 64 ; CHECK: [[E2:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s256), 128 @@ -240,19 +273,26 @@ } ; CHECK-LABEL: name: test_split_struct -; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD {{.*}}(p0) -; CHECK: [[LO:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 0 -; CHECK: [[HI:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 64 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.ptr) +; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64) +; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8 from %ir.ptr + 8) + +; CHECK: [[IMPDEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF +; CHECK: [[INS1:%[0-9]+]]:_(s128) = G_INSERT [[IMPDEF]], [[LO]](s64), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s128) = G_INSERT [[INS1]], [[HI]](s64), 64 +; CHECK: [[EXTLO:%[0-9]+]]:_(s64) = G_EXTRACT [[INS2]](s128), 0 +; CHECK: [[EXTHI:%[0-9]+]]:_(s64) = G_EXTRACT [[INS2]](s128), 64 ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp -; CHECK: [[OFF:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]] -; CHECK: G_STORE [[LO]](s64), [[ADDR]](p0) :: (store 8 into stack, align 0) - +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[CST2]](s64) +; CHECK: G_STORE [[EXTLO]](s64), [[GEP2]](p0) :: (store 8 into stack, align 0) ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp -; CHECK: [[OFF:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]] -; CHECK: G_STORE [[HI]](s64), [[ADDR]](p0) :: (store 8 into stack + 8, align 0) +; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[CST3]](s64) +; CHECK: G_STORE [[EXTHI]](s64), [[GEP3]](p0) :: (store 8 into stack + 8, align 0) define void @test_split_struct([2 x i64]* %ptr) { %struct = load [2 x i64], [2 x i64]* %ptr call void @take_split_struct([2 x i64]* null, i64 1, i64 2, i64 3, Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll @@ -19,11 +19,11 @@ ; CHECK: [[BAD]].{{[a-z]+}} (landing-pad): ; CHECK: EH_LABEL -; CHECK: [[UNDEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF ; CHECK: [[PTR:%[0-9]+]]:_(p0) = COPY $x0 -; CHECK: [[VAL_WITH_PTR:%[0-9]+]]:_(s128) = G_INSERT [[UNDEF]], [[PTR]](p0), 0 ; CHECK: [[SEL_PTR:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[SEL:%[0-9]+]]:_(s32) = G_PTRTOINT [[SEL_PTR]] +; CHECK: [[UNDEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF +; CHECK: [[VAL_WITH_PTR:%[0-9]+]]:_(s128) = G_INSERT [[UNDEF]], [[PTR]](p0), 0 ; CHECK: [[PTR_SEL:%[0-9]+]]:_(s128) = G_INSERT [[VAL_WITH_PTR]], [[SEL]](s32), 64 ; CHECK: [[PTR_RET:%[0-9]+]]:_(s64) = G_EXTRACT [[PTR_SEL]](s128), 0 ; CHECK: [[SEL_RET:%[0-9]+]]:_(s32) = G_EXTRACT [[PTR_SEL]](s128), 64 Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll @@ -16,18 +16,10 @@ ; CHECK: EH_LABEL ; CHECK: [[PTR:%[0-9]+]]:_(p0) = COPY $x0 -; CHECK: [[STRUCT_PTR:%[0-9]+]]:_(s64) = G_PTRTOINT [[PTR]](p0) - ; CHECK: [[SEL_PTR:%[0-9]+]]:_(p0) = COPY $x1 -; CHECK: [[SEL:%[0-9]+]]:_(s32) = G_PTRTOINT [[SEL_PTR]] -; CHECK: [[STRUCT_SEL:%[0-9]+]]:_(s64) = G_INSERT {{%[0-9]+}}, [[SEL]](s32), 0 - -; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[STRUCT_PTR]](s64) -; CHECK: G_STORE [[PTR]](p0), {{%[0-9]+}}(p0) - -; CHECK: [[SEL_TMP:%[0-9]+]]:_(s32) = G_EXTRACT [[STRUCT_SEL]](s64), 0 -; CHECK: [[SEL:%[0-9]+]]:_(s32) = COPY [[SEL_TMP]] -; CHECK: G_STORE [[SEL]](s32), {{%[0-9]+}}(p0) +; CHECK: [[SEL_PTR_INT:%[0-9]+]]:_(s32) = G_PTRTOINT [[SEL_PTR]](p0) +; CHECK: G_STORE [[PTR]](p0), %0(p0) :: (store 8 into %ir.exn.slot) +; CHECK: G_STORE [[SEL_PTR_INT]](s32), %1(p0) :: (store 4 into %ir.ehselector.slot) define void @bar() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { %exn.slot = alloca i8* Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple aarch64 -O0 -stop-after=instruction-select -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s + +%dag = type { { { i8, { i8 } }, { { i8, { i8 } }, { i8 } } }, { { i8, { i8 } }, { i8 } } } + +define void @test_const(%dag* %dst) { + ; CHECK-LABEL: name: test_const + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 10 + ; CHECK: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 20 + ; CHECK: [[MOVi32imm2:%[0-9]+]]:gpr32 = MOVi32imm 50 + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 0 :: (store 1 into %ir.dst) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 1 :: (store 1 into %ir.dst + 1) + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 2 :: (store 1 into %ir.dst + 2) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 3 :: (store 1 into %ir.dst + 3) + ; CHECK: STRBBui [[MOVi32imm2]], [[COPY]], 4 :: (store 1 into %ir.dst + 4) + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 5 :: (store 1 into %ir.dst + 5) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 6 :: (store 1 into %ir.dst + 6) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 7 :: (store 1 into %ir.dst + 7) + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 0 :: (store 1 into %ir.dst) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 1 :: (store 1 into %ir.dst + 1) + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 2 :: (store 1 into %ir.dst + 2) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 3 :: (store 1 into %ir.dst + 3) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 4 :: (store 1 into %ir.dst + 4) + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 5 :: (store 1 into %ir.dst + 5) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 6 :: (store 1 into %ir.dst + 6) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 7 :: (store 1 into %ir.dst + 7) + ; CHECK: RET_ReallyLR +entry: + %updated = insertvalue + ; Check that we're visiting constants with shared parts + ; (deduplicated via LLVMContext, forming a proper DAG) correctly: + %dag { + { { i8, { i8 } }, { { i8, { i8 } }, { i8 } } } { + { i8, { i8 } } { + i8 10, + { i8 } { i8 20 } + }, + { { i8, { i8 } }, { i8 } } { + { i8, { i8 } } { + i8 10, + { i8 } { i8 20 } + }, + { i8 } { i8 20 } + } + }, + { { i8, { i8 } }, { i8 } } { + { i8, { i8 } } { + i8 10, + { i8 } { i8 20 } + }, + { i8 } { i8 20 } + } + }, + { { i8, { i8 } }, { i8 } } { + { i8, { i8 } } { + i8 10, + { i8 } { i8 20 } + }, + { i8 } { i8 50 } + }, + 0, + 1 + store %dag %updated, %dag* %dst + ; 10, 20, 10, 20, 50, 10, 20, 20 sequence is expected + + store + ; Check that we didn't overwrite a previously seen constant + ; while processing an insertvalue into it: + %dag { + { { i8, { i8 } }, { { i8, { i8 } }, { i8 } } } { + { i8, { i8 } } { + i8 10, + { i8 } { i8 20 } + }, + { { i8, { i8 } }, { i8 } } { + { i8, { i8 } } { + i8 10, + { i8 } { i8 20 } + }, + { i8 } { i8 20 } + } + }, + { { i8, { i8 } }, { i8 } } { + { i8, { i8 } } { + i8 10, + { i8 } { i8 20 } + }, + { i8 } { i8 20 } + } + }, + %dag* %dst + ; 10, 20, 10, 20, 20, 10, 20, 20 sequence is expected + ret void +} Index: llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll +++ llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll @@ -522,7 +522,8 @@ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32) -; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>) +; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[C3]] %vec = extractvalue %struct.v2s32 {<2 x i32>}, 0 %elt = extractelement <2 x i32> %vec, i32 0 ret i32 %elt @@ -537,12 +538,8 @@ ; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 -; CHECK: [[C5:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF -; CHECK: [[C6:%[0-9]+]]:_(s128) = G_INSERT [[C5]], [[VEC]](<2 x s32>), 0 -; CHECK: [[C7:%[0-9]+]]:_(s128) = G_INSERT [[C6]], [[C3]](s32), 64 -; CHECK: [[C8:%[0-9]+]]:_(s128) = G_INSERT [[C7]], [[C4]](s32), 96 -; CHECK: [[EXT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[C8]](s128), 0 -; CHECK: G_EXTRACT_VECTOR_ELT [[EXT]](<2 x s32>) +; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[C5]](s32) %vec = extractvalue %struct.v2s32.s32.s32 {<2 x i32>, i32 3, i32 4}, 0 %elt = extractelement <2 x i32> %vec, i32 0 ret i32 %elt Index: llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll +++ llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll @@ -30,7 +30,7 @@ ; CHECK: ADJCALLSTACKDOWN 8, 0, 14, $noreg, implicit-def $sp, implicit $sp ; CHECK-DAG: $r0 = COPY [[BVREG]] ; CHECK-DAG: $r1 = COPY [[AVREG]] -; CHECK-DAG: $r2 = COPY [[BVREG]] +; CHECK-DxAG: $r2 = COPY [[BVREG]] ; CHECK-DAG: $r3 = COPY [[AVREG]] ; CHECK: [[SP1:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -191,8 +191,13 @@ ; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY $r0 ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY $r1 ; CHECK: [[ARG_ARR:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) +; CHECK: [[EXT1:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR]](s64), 0 +; CHECK: [[EXT2:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR]](s64), 32 +; CHECK: [[IMPDEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF +; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[EXT1]](s32), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[EXT2]](s32), 32 ; CHECK: ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def $sp, implicit $sp -; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR]](s64) +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS2]](s64) ; CHECK: $r0 = COPY [[R0]] ; CHECK: $r1 = COPY [[R1]] ; CHECK: BL @tiny_int_arrays_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $r0, implicit-def $r1 @@ -201,7 +206,14 @@ ; CHECK: [[R2:%[0-9]+]]:_(s32) = COPY $r2 ; CHECK: [[RES_ARR:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32) ; CHECK: ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def $sp, implicit $sp -; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[RES_ARR]](s96) +; CHECK: [[EXT3:%[0-9]+]]:_(s32) = G_EXTRACT [[RES_ARR]](s96), 0 +; CHECK: [[EXT4:%[0-9]+]]:_(s32) = G_EXTRACT [[RES_ARR]](s96), 32 +; CHECK: [[EXT5:%[0-9]+]]:_(s32) = G_EXTRACT [[RES_ARR]](s96), 64 +; CHECK: [[IMPDEF2:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF +; CHECK: [[INS3:%[0-9]+]]:_(s96) = G_INSERT [[IMPDEF2]], [[EXT3]](s32), 0 +; CHECK: [[INS4:%[0-9]+]]:_(s96) = G_INSERT [[INS3]], [[EXT4]](s32), 32 +; CHECK: [[INS5:%[0-9]+]]:_(s96) = G_INSERT [[INS4]], [[EXT5]](s32), 64 +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS5]](s96) ; FIXME: This doesn't seem correct with regard to the AAPCS docs (which say ; that composite types larger than 4 bytes should be passed through memory), ; but it's what DAGISel does. We should fix it in the common code for both. @@ -225,9 +237,19 @@ ; CHECK: [[R3:%[0-9]+]]:_(s32) = COPY $r3 ; CHECK: [[ARG_ARR0:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) ; CHECK: [[ARG_ARR1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R2]](s32), [[R3]](s32) +; CHECK: [[EXT1:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR0]](s64), 0 +; CHECK: [[EXT2:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR0]](s64), 32 +; CHECK: [[EXT3:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR1]](s64), 0 +; CHECK: [[EXT4:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR1]](s64), 32 +; CHECK: [[IMPDEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF +; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[EXT1]](s32), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[EXT2]](s32), 32 +; CHECK: [[IMPDEF2:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF +; CHECK: [[INS3:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF2]], [[EXT3]](s32), 0 +; CHECK: [[INS4:%[0-9]+]]:_(s64) = G_INSERT [[INS3]], [[EXT4]](s32), 32 ; CHECK: ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def $sp, implicit $sp -; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR0]](s64) -; CHECK: [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR1]](s64) +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS2]](s64) +; CHECK: [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS4]](s64) ; CHECK: $r0 = COPY [[R0]] ; CHECK: $r1 = COPY [[R1]] ; CHECK: $r2 = COPY [[R2]] @@ -259,8 +281,9 @@ ; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]] ; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]] ; CHECK: [[ARG_ARR:%[0-9]+]]:_(s640) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32), [[FIRST_STACK_ELEMENT]](s32), {{.*}}, [[LAST_STACK_ELEMENT]](s32) +; CHECK: [[INS:%[0-9]+]]:_(s640) = G_INSERT {{.*}}, {{.*}}(s32), 608 ; CHECK: ADJCALLSTACKDOWN 64, 0, 14, $noreg, implicit-def $sp, implicit $sp -; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32), [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR]](s640) +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32), [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS]](s640) ; CHECK: $r0 = COPY [[R0]] ; CHECK: $r1 = COPY [[R1]] ; CHECK: $r2 = COPY [[R2]] @@ -301,8 +324,15 @@ ; CHECK: [[ARR2_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[ARR2_ID]] ; CHECK: [[ARR2:%[0-9]+]]:_(s64) = G_LOAD [[ARR2_FI]]{{.*}}load 8 from %fixed-stack.[[ARR2_ID]] ; CHECK: [[ARR_MERGED:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[ARR0]](s64), [[ARR1]](s64), [[ARR2]](s64) +; CHECK: [[EXT1:%[0-9]+]]:_(s64) = G_EXTRACT [[ARR_MERGED]](s192), 0 +; CHECK: [[EXT2:%[0-9]+]]:_(s64) = G_EXTRACT [[ARR_MERGED]](s192), 64 +; CHECK: [[EXT3:%[0-9]+]]:_(s64) = G_EXTRACT [[ARR_MERGED]](s192), 128 +; CHECK: [[IMPDEF:%[0-9]+]]:_(s192) = G_IMPLICIT_DEF +; CHECK: [[INS1:%[0-9]+]]:_(s192) = G_INSERT [[IMPDEF]], [[EXT1]](s64), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s192) = G_INSERT [[INS1]], [[EXT2]](s64), 64 +; CHECK: [[INS3:%[0-9]+]]:_(s192) = G_INSERT [[INS2]], [[EXT3]](s64), 128 ; CHECK: ADJCALLSTACKDOWN 8, 0, 14, $noreg, implicit-def $sp, implicit $sp -; CHECK: [[ARR0:%[0-9]+]]:_(s64), [[ARR1:%[0-9]+]]:_(s64), [[ARR2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ARR_MERGED]](s192) +; CHECK: [[ARR0:%[0-9]+]]:_(s64), [[ARR1:%[0-9]+]]:_(s64), [[ARR2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[INS3]](s192) ; CHECK: [[ARR0_0:%[0-9]+]]:_(s32), [[ARR0_1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARR0]](s64) ; LITTLE: $r0 = COPY [[ARR0_0]](s32) ; LITTLE: $r1 = COPY [[ARR0_1]](s32) @@ -322,7 +352,12 @@ ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY $r1 ; CHECK: [[R_MERGED:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) ; CHECK: ADJCALLSTACKUP 8, 0, 14, $noreg, implicit-def $sp, implicit $sp -; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[R_MERGED]](s64) +; CHECK: [[EXT4:%[0-9]+]]:_(s32) = G_EXTRACT [[R_MERGED]](s64), 0 +; CHECK: [[EXT5:%[0-9]+]]:_(s32) = G_EXTRACT [[R_MERGED]](s64), 32 +; CHECK: [[IMPDEF2:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF +; CHECK: [[INS4:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF2]], [[EXT4]](s32), 0 +; CHECK: [[INS5:%[0-9]+]]:_(s64) = G_INSERT [[INS4]], [[EXT5]](s32), 32 +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS5]](s64) ; CHECK: $r0 = COPY [[R0]] ; CHECK: $r1 = COPY [[R1]] ; CHECK: BX_RET 14, $noreg, implicit $r0, implicit $r1 @@ -358,10 +393,33 @@ ; CHECK: [[X_ARR:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[X0]](s64), [[X1]](s64), [[X2]](s64) ; CHECK: [[Y_ARR:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32), [[Y2]](s32) ; CHECK: [[Z_ARR:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[Z0]](s64), [[Z1]](s64), [[Z2]](s64), [[Z3]](s64) +; CHECK: [[EXT1:%[0-9]+]]:_(s64) = G_EXTRACT [[X_ARR]](s192), 0 +; CHECK: [[EXT2:%[0-9]+]]:_(s64) = G_EXTRACT [[X_ARR]](s192), 64 +; CHECK: [[EXT3:%[0-9]+]]:_(s64) = G_EXTRACT [[X_ARR]](s192), 128 +; CHECK: [[EXT4:%[0-9]+]]:_(s32) = G_EXTRACT [[Y_ARR]](s96), 0 +; CHECK: [[EXT5:%[0-9]+]]:_(s32) = G_EXTRACT [[Y_ARR]](s96), 32 +; CHECK: [[EXT6:%[0-9]+]]:_(s32) = G_EXTRACT [[Y_ARR]](s96), 64 +; CHECK: [[EXT7:%[0-9]+]]:_(s64) = G_EXTRACT [[Z_ARR]](s256), 0 +; CHECK: [[EXT8:%[0-9]+]]:_(s64) = G_EXTRACT [[Z_ARR]](s256), 64 +; CHECK: [[EXT9:%[0-9]+]]:_(s64) = G_EXTRACT [[Z_ARR]](s256), 128 +; CHECK: [[EXT10:%[0-9]+]]:_(s64) = G_EXTRACT [[Z_ARR]](s256), 192 +; CHECK: [[IMPDEF:%[0-9]+]]:_(s192) = G_IMPLICIT_DEF +; CHECK: [[INS1:%[0-9]+]]:_(s192) = G_INSERT [[IMPDEF]], [[EXT1]](s64), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s192) = G_INSERT [[INS1]], [[EXT2]](s64), 64 +; CHECK: [[INS3:%[0-9]+]]:_(s192) = G_INSERT [[INS2]], [[EXT3]](s64), 128 +; CHECK: [[IMPDEF2:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF +; CHECK: [[INS4:%[0-9]+]]:_(s96) = G_INSERT [[IMPDEF2]], [[EXT4]](s32), 0 +; CHECK: [[INS5:%[0-9]+]]:_(s96) = G_INSERT [[INS4]], [[EXT5]](s32), 32 +; CHECK: [[INS6:%[0-9]+]]:_(s96) = G_INSERT [[INS5]], [[EXT6]](s32), 64 +; CHECK: [[IMPDEF3:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF +; CHECK: [[INS7:%[0-9]+]]:_(s256) = G_INSERT [[IMPDEF3]], [[EXT7]](s64), 0 +; CHECK: [[INS8:%[0-9]+]]:_(s256) = G_INSERT [[INS7]], [[EXT8]](s64), 64 +; CHECK: [[INS9:%[0-9]+]]:_(s256) = G_INSERT [[INS8]], [[EXT9]](s64), 128 +; CHECK: [[INS10:%[0-9]+]]:_(s256) = G_INSERT [[INS9]], [[EXT10]](s64), 192 ; CHECK: ADJCALLSTACKDOWN 32, 0, 14, $noreg, implicit-def $sp, implicit $sp -; CHECK: [[X0:%[0-9]+]]:_(s64), [[X1:%[0-9]+]]:_(s64), [[X2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[X_ARR]](s192) -; CHECK: [[Y0:%[0-9]+]]:_(s32), [[Y1:%[0-9]+]]:_(s32), [[Y2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[Y_ARR]](s96) -; CHECK: [[Z0:%[0-9]+]]:_(s64), [[Z1:%[0-9]+]]:_(s64), [[Z2:%[0-9]+]]:_(s64), [[Z3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[Z_ARR]](s256) +; CHECK: [[X0:%[0-9]+]]:_(s64), [[X1:%[0-9]+]]:_(s64), [[X2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[INS3]](s192) +; CHECK: [[Y0:%[0-9]+]]:_(s32), [[Y1:%[0-9]+]]:_(s32), [[Y2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS6]](s96) +; CHECK: [[Z0:%[0-9]+]]:_(s64), [[Z1:%[0-9]+]]:_(s64), [[Z2:%[0-9]+]]:_(s64), [[Z3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[INS10]](s256) ; CHECK: $d0 = COPY [[X0]](s64) ; CHECK: $d1 = COPY [[X1]](s64) ; CHECK: $d2 = COPY [[X2]](s64) @@ -391,7 +449,16 @@ ; CHECK: [[R3:%[0-9]+]]:_(s32) = COPY $s3 ; CHECK: [[R_MERGED:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32) ; CHECK: ADJCALLSTACKUP 32, 0, 14, $noreg, implicit-def $sp, implicit $sp -; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[R_MERGED]](s128) +; CHECK: [[EXT11:%[0-9]+]]:_(s32) = G_EXTRACT [[R_MERGED]](s128), 0 +; CHECK: [[EXT12:%[0-9]+]]:_(s32) = G_EXTRACT [[R_MERGED]](s128), 32 +; CHECK: [[EXT13:%[0-9]+]]:_(s32) = G_EXTRACT [[R_MERGED]](s128), 64 +; CHECK: [[EXT14:%[0-9]+]]:_(s32) = G_EXTRACT [[R_MERGED]](s128), 96 +; CHECK: [[IMPDEF4:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF +; CHECK: [[INS11:%[0-9]+]]:_(s128) = G_INSERT [[IMPDEF4]], [[EXT11]](s32), 0 +; CHECK: [[INS12:%[0-9]+]]:_(s128) = G_INSERT [[INS11]], [[EXT12]](s32), 32 +; CHECK: [[INS13:%[0-9]+]]:_(s128) = G_INSERT [[INS12]], [[EXT13]](s32), 64 +; CHECK: [[INS14:%[0-9]+]]:_(s128) = G_INSERT [[INS13]], [[EXT14]](s32), 96 +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS14]](s128) ; CHECK: $s0 = COPY [[R0]] ; CHECK: $s1 = COPY [[R1]] ; CHECK: $s2 = COPY [[R2]] @@ -421,8 +488,9 @@ ; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]] ; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]] ; CHECK: [[ARG_ARR:%[0-9]+]]:_(s768) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32), [[FIRST_STACK_ELEMENT]](s32), {{.*}}, [[LAST_STACK_ELEMENT]](s32) +; CHECK: [[INS:%[0-9]+]]:_(s768) = G_INSERT {{.*}}, {{.*}}(s32), 736 ; CHECK: ADJCALLSTACKDOWN 80, 0, 14, $noreg, implicit-def $sp, implicit $sp -; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32), [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR]](s768) +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32), [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS]](s768) ; CHECK: $r0 = COPY [[R0]] ; CHECK: $r1 = COPY [[R1]] ; CHECK: $r2 = COPY [[R2]] @@ -442,7 +510,12 @@ ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY $r1 ; CHECK: [[RES_ARR:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) ; CHECK: ADJCALLSTACKUP 80, 0, 14, $noreg, implicit-def $sp, implicit $sp -; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[RES_ARR]](s64) +; CHECK: [[EXT1:%[0-9]+]]:_(p0) = G_EXTRACT [[RES_ARR]](s64), 0 +; CHECK: [[EXT2:%[0-9]+]]:_(p0) = G_EXTRACT [[RES_ARR]](s64), 32 +; CHECK: [[IMPDEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF +; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[EXT1]](p0), 0 +; CHECK: [[INS3:%[0-9]+]]:_(s64) = G_INSERT [[INS2]], [[EXT2]](p0), 32 +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS3]](s64) ; CHECK: $r0 = COPY [[R0]] ; CHECK: $r1 = COPY [[R1]] ; CHECK: BX_RET 14, $noreg, implicit $r0, implicit $r1 @@ -459,8 +532,13 @@ ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY $r0 ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY $r1 ; CHECK: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) +; CHECK: [[EXT1:%[0-9]+]]:_(s32) = G_EXTRACT [[X]](s64), 0 +; CHECK: [[EXT2:%[0-9]+]]:_(s32) = G_EXTRACT [[X]](s64), 32 +; CHECK: [[IMPDEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF +; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[EXT1]](s32), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[EXT2]](s32), 32 ; CHECK: ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def $sp, implicit $sp -; CHECK: [[X0:%[0-9]+]]:_(s32), [[X1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[X]](s64) +; CHECK: [[X0:%[0-9]+]]:_(s32), [[X1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS2]](s64) ; CHECK-DAG: $r0 = COPY [[X0]](s32) ; CHECK-DAG: $r1 = COPY [[X1]](s32) ; CHECK: BL @structs_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $r0, implicit-def $r1 @@ -468,7 +546,12 @@ ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY $r1 ; CHECK: [[R:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) ; CHECK: ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def $sp, implicit $sp -; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[R]](s64) +; CHECK: [[EXT3:%[0-9]+]]:_(s32) = G_EXTRACT [[R]](s64), 0 +; CHECK: [[EXT4:%[0-9]+]]:_(s32) = G_EXTRACT [[R]](s64), 32 +; CHECK: [[IMPDEF2:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF +; CHECK: [[INS3:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF2]], [[EXT3]](s32), 0 +; CHECK: [[INS4:%[0-9]+]]:_(s64) = G_INSERT [[INS3]], [[EXT4]](s32), 32 +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS4]](s64) ; CHECK: $r0 = COPY [[R0]](s32) ; CHECK: $r1 = COPY [[R1]](s32) ; CHECK: BX_RET 14, $noreg, implicit $r0, implicit $r1