Index: llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h +++ llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h @@ -559,8 +559,7 @@ return Objects[ObjectIdx+NumFixedObjects].isAliased; } - /// isImmutableObjectIndex - Returns true if the specified index corresponds - /// to an immutable object. + /// Returns true if the specified index corresponds to an immutable object. bool isImmutableObjectIndex(int ObjectIdx) const { // Tail calling functions can clobber their function arguments. if (HasTailCall) @@ -570,6 +569,13 @@ return Objects[ObjectIdx+NumFixedObjects].isImmutable; } + /// Marks the immutability of an object. + void setIsImmutableObjectIndex(int ObjectIdx, bool Immutable) { + assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && + "Invalid Object Idx!"); + Objects[ObjectIdx+NumFixedObjects].isImmutable = Immutable; + } + /// Returns true if the specified index corresponds to a spill slot. bool isSpillSlotObjectIndex(int ObjectIdx) const { assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && Index: llvm/trunk/include/llvm/CodeGen/SelectionDAGISel.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAGISel.h +++ llvm/trunk/include/llvm/CodeGen/SelectionDAGISel.h @@ -54,6 +54,7 @@ const TargetInstrInfo *TII; const TargetLowering *TLI; bool FastISelFailed; + SmallPtrSet ElidedArgCopyInstrs; static char ID; Index: llvm/trunk/include/llvm/Target/TargetCallingConv.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetCallingConv.h +++ llvm/trunk/include/llvm/Target/TargetCallingConv.h @@ -45,6 +45,7 @@ unsigned OrigAlign : 5; ///< Log 2 of original alignment unsigned IsInConsecutiveRegsLast : 1; unsigned IsInConsecutiveRegs : 1; + unsigned IsCopyElisionCandidate : 1; ///< Argument copy elision candidate unsigned ByValSize; ///< Byval struct size @@ -54,7 +55,8 @@ IsReturned(0), IsSplit(0), IsInAlloca(0), IsSplitEnd(0), IsSwiftSelf(0), IsSwiftError(0), IsHva(0), IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0), - IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0), ByValSize(0) { + IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0), + IsCopyElisionCandidate(0), ByValSize(0) { static_assert(sizeof(*this) == 2 * sizeof(unsigned), "flags are too big"); } @@ -109,6 +111,9 @@ bool isSplitEnd() const { return IsSplitEnd; } void setSplitEnd() { IsSplitEnd = 1; } + bool isCopyElisionCandidate() const { return IsCopyElisionCandidate; } + void setCopyElisionCandidate() { IsCopyElisionCandidate = 1; } + unsigned getByValAlign() const { return (1U << ByValAlign) / 2; } void setByValAlign(unsigned A) { ByValAlign = Log2_32(A) + 1; Index: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h =================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -89,7 +89,7 @@ assert(!MInsn && "Already initialized?"); assert((!E || E->isValid()) && "Expected valid expression"); - assert(~FI && "Expected valid index"); + assert(FI != INT_MAX && "Expected valid index"); FrameIndexExprs.push_back({FI, E}); } Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8028,6 +8028,173 @@ return true; } +typedef DenseMap> + ArgCopyElisionMapTy; + +/// Scan the entry block of the function in FuncInfo for arguments that look +/// like copies into a local alloca. Record any copied arguments in +/// ArgCopyElisionCandidates. +static void +findArgumentCopyElisionCandidates(const DataLayout &DL, + FunctionLoweringInfo *FuncInfo, + ArgCopyElisionMapTy &ArgCopyElisionCandidates) { + // Record the state of every static alloca used in the entry block. Argument + // allocas are all used in the entry block, so we need approximately as many + // entries as we have arguments. + enum StaticAllocaInfo { Unknown, Clobbered, Elidable }; + SmallDenseMap StaticAllocas; + unsigned NumArgs = FuncInfo->Fn->getArgumentList().size(); + StaticAllocas.reserve(NumArgs * 2); + + auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * { + if (!V) + return nullptr; + V = V->stripPointerCasts(); + const auto *AI = dyn_cast(V); + if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI)) + return nullptr; + auto Iter = StaticAllocas.insert({AI, Unknown}); + return &Iter.first->second; + }; + + // Look for stores of arguments to static allocas. Look through bitcasts and + // GEPs to handle type coercions, as long as the alloca is fully initialized + // by the store. Any non-store use of an alloca escapes it and any subsequent + // unanalyzed store might write it. + // FIXME: Handle structs initialized with multiple stores. + for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) { + // Look for stores, and handle non-store uses conservatively. + const auto *SI = dyn_cast(&I); + if (!SI) { + // We will look through cast uses, so ignore them completely. + if (I.isCast()) + continue; + // Ignore debug info intrinsics, they don't escape or store to allocas. + if (isa(I)) + continue; + // This is an unknown instruction. Assume it escapes or writes to all + // static alloca operands. + for (const Use &U : I.operands()) { + if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U)) + *Info = StaticAllocaInfo::Clobbered; + } + continue; + } + + // If the stored value is a static alloca, mark it as escaped. + if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand())) + *Info = StaticAllocaInfo::Clobbered; + + // Check if the destination is a static alloca. + const Value *Dst = SI->getPointerOperand()->stripPointerCasts(); + StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst); + if (!Info) + continue; + const AllocaInst *AI = cast(Dst); + + // Skip allocas that have been initialized or clobbered. + if (*Info != StaticAllocaInfo::Unknown) + continue; + + // Check if the stored value is an argument, and that this store fully + // initializes the alloca. Don't elide copies from the same argument twice. + const Value *Val = SI->getValueOperand()->stripPointerCasts(); + const auto *Arg = dyn_cast(Val); + if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() || + Arg->getType()->isEmptyTy() || + DL.getTypeStoreSize(Arg->getType()) != + DL.getTypeAllocSize(AI->getAllocatedType()) || + ArgCopyElisionCandidates.count(Arg)) { + *Info = StaticAllocaInfo::Clobbered; + continue; + } + + DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI << '\n'); + + // Mark this alloca and store for argument copy elision. + *Info = StaticAllocaInfo::Elidable; + ArgCopyElisionCandidates.insert({Arg, {AI, SI}}); + + // Stop scanning if we've seen all arguments. This will happen early in -O0 + // builds, which is useful, because -O0 builds have large entry blocks and + // many allocas. + if (ArgCopyElisionCandidates.size() == NumArgs) + break; + } +} + +/// Try to elide argument copies from memory into a local alloca. Succeeds if +/// ArgVal is a load from a suitable fixed stack object. +static void tryToElideArgumentCopy( + FunctionLoweringInfo *FuncInfo, SmallVectorImpl &Chains, + DenseMap &ArgCopyElisionFrameIndexMap, + SmallPtrSetImpl &ElidedArgCopyInstrs, + ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg, + SDValue ArgVal, bool &ArgHasUses) { + // Check if this is a load from a fixed stack object. + auto *LNode = dyn_cast(ArgVal); + if (!LNode) + return; + auto *FINode = dyn_cast(LNode->getBasePtr().getNode()); + if (!FINode) + return; + + // Check that the fixed stack object is the right size and alignment. + // Look at the alignment that the user wrote on the alloca instead of looking + // at the stack object. + auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg); + assert(ArgCopyIter != ArgCopyElisionCandidates.end()); + const AllocaInst *AI = ArgCopyIter->second.first; + int FixedIndex = FINode->getIndex(); + int &AllocaIndex = FuncInfo->StaticAllocaMap[AI]; + int OldIndex = AllocaIndex; + MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo(); + if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) { + DEBUG(dbgs() << " argument copy elision failed due to bad fixed stack " + "object size\n"); + return; + } + unsigned RequiredAlignment = AI->getAlignment(); + if (!RequiredAlignment) { + RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment( + AI->getAllocatedType()); + } + if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) { + DEBUG(dbgs() << " argument copy elision failed: alignment of alloca " + "greater than stack argument alignment (" + << RequiredAlignment << " vs " + << MFI.getObjectAlignment(FixedIndex) << ")\n"); + return; + } + + // Perform the elision. Delete the old stack object and replace its only use + // in the variable info map. Mark the stack object as mutable. + DEBUG({ + dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n' + << " Replacing frame index " << OldIndex << " with " << FixedIndex + << '\n'; + }); + MFI.RemoveStackObject(OldIndex); + MFI.setIsImmutableObjectIndex(FixedIndex, false); + AllocaIndex = FixedIndex; + ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex}); + Chains.push_back(ArgVal.getValue(1)); + + // Avoid emitting code for the store implementing the copy. + const StoreInst *SI = ArgCopyIter->second.second; + ElidedArgCopyInstrs.insert(SI); + + // Check for uses of the argument again so that we can avoid exporting ArgVal + // if it is't used by anything other than the store. + for (const Value *U : Arg.users()) { + if (U != SI) { + ArgHasUses = true; + break; + } + } +} + void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); @@ -8050,6 +8217,12 @@ Ins.push_back(RetArg); } + // Look for stores of arguments to static allocas. Mark such arguments with a + // flag to ask the target to give us the memory location of that argument if + // available. + ArgCopyElisionMapTy ArgCopyElisionCandidates; + findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates); + // Set up the incoming argument description vector. unsigned Idx = 0; for (const Argument &Arg : F.args()) { @@ -8127,6 +8300,8 @@ if (NeedsRegBlock) Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); + if (ArgCopyElisionCandidates.count(&Arg)) + Flags.setCopyElisionCandidate(); MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); @@ -8199,19 +8374,33 @@ ++i; } + SmallVector Chains; + DenseMap ArgCopyElisionFrameIndexMap; for (const Argument &Arg : F.args()) { ++Idx; SmallVector ArgValues; SmallVector ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + continue; + + bool ArgHasUses = !Arg.use_empty(); + + // Elide the copying store if the target loaded this argument from a + // suitable fixed stack object. + if (Ins[i].Flags.isCopyElisionCandidate()) { + tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap, + ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg, + InVals[i], ArgHasUses); + } // If this argument is unused then remember its value. It is used to generate // debugging information. bool isSwiftErrorArg = TLI->supportSwiftError() && F.getAttributes().hasAttribute(Idx, Attribute::SwiftError); - if (Arg.use_empty() && NumValues && !isSwiftErrorArg) { + if (!ArgHasUses && !isSwiftErrorArg) { SDB->setUnusedArgValue(&Arg, InVals[i]); // Also remember any frame index for use in FastISel. @@ -8228,16 +8417,15 @@ // Even an apparant 'unused' swifterror argument needs to be returned. So // we do generate a copy for it that can be used on return from the // function. - if (!Arg.use_empty() || isSwiftErrorArg) { + if (ArgHasUses || isSwiftErrorArg) { Optional AssertOp; if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) AssertOp = ISD::AssertSext; else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) AssertOp = ISD::AssertZext; - ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], - NumParts, PartVT, VT, - nullptr, AssertOp)); + ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, + PartVT, VT, nullptr, AssertOp)); } i += NumParts; @@ -8291,8 +8479,26 @@ } } + if (!Chains.empty()) { + Chains.push_back(NewRoot); + NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + } + + DAG.setRoot(NewRoot); + assert(i == InVals.size() && "Argument register count mismatch!"); + // If any argument copy elisions occurred and we have debug info, update the + // stale frame indices used in the dbg.declare variable info table. + MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo(); + if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) { + for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) { + auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot); + if (I != ArgCopyElisionFrameIndexMap.end()) + VI.Slot = I->second; + } + } + // Finally, if the target has anything special to do, allow it to do so. EmitFunctionEntryCode(); } Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -713,8 +713,10 @@ bool &HadTailCall) { // Lower the instructions. If a call is emitted as a tail call, cease emitting // nodes for this block. - for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) - SDB->visit(*I); + for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) { + if (!ElidedArgCopyInstrs.count(&*I)) + SDB->visit(*I); + } // Make sure the root of the DAG is up-to-date. CurDAG->setRoot(SDB->getControlRoot()); @@ -1564,7 +1566,8 @@ const Instruction *Inst = &*std::prev(BI); // If we no longer require this instruction, skip it. - if (isFoldedOrDeadInstruction(Inst, FuncInfo)) { + if (isFoldedOrDeadInstruction(Inst, FuncInfo) || + ElidedArgCopyInstrs.count(Inst)) { --NumFastIselRemaining; continue; } @@ -1694,6 +1697,7 @@ FinishBasicBlock(); FuncInfo->PHINodesToUpdate.clear(); + ElidedArgCopyInstrs.clear(); } propagateSwiftErrorVRegs(FuncInfo); Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -2691,6 +2691,7 @@ CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt); bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); EVT ValVT; + MVT PtrVT = getPointerTy(DAG.getDataLayout()); // If value is passed by pointer we have address passed instead of the value // itself. No need to extend if the mask value and location share the same @@ -2729,30 +2730,71 @@ if (CallConv == CallingConv::X86_INTR) { MFI.setObjectOffset(FI, Offset); } - return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); - } else { - int FI = MFI.CreateFixedObject(ValVT.getSizeInBits()/8, - VA.getLocMemOffset(), isImmutable); + return DAG.getFrameIndex(FI, PtrVT); + } - // Set SExt or ZExt flag. - if (VA.getLocInfo() == CCValAssign::ZExt) { - MFI.setObjectZExt(FI, true); - } else if (VA.getLocInfo() == CCValAssign::SExt) { - MFI.setObjectSExt(FI, true); + // This is an argument in memory. We might be able to perform copy elision. + if (Flags.isCopyElisionCandidate()) { + EVT ArgVT = Ins[i].ArgVT; + SDValue PartAddr; + if (Ins[i].PartOffset == 0) { + // If this is a one-part value or the first part of a multi-part value, + // create a stack object for the entire argument value type and return a + // load from our portion of it. This assumes that if the first part of an + // argument is in memory, the rest will also be in memory. + int FI = MFI.CreateFixedObject(ArgVT.getSizeInBits() / 8, + VA.getLocMemOffset(), /*Immutable=*/false); + PartAddr = DAG.getFrameIndex(FI, PtrVT); + return DAG.getLoad( + ValVT, dl, Chain, PartAddr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); + } else { + // This is not the first piece of an argument in memory. See if there is + // already a fixed stack object including this offset. If so, assume it + // was created by the PartOffset == 0 branch above and create a load from + // the appropriate offset into it. + int64_t PartBegin = VA.getLocMemOffset(); + int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8; + int FI = MFI.getObjectIndexBegin(); + for (; MFI.isFixedObjectIndex(FI); ++FI) { + int64_t ObjBegin = MFI.getObjectOffset(FI); + int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI); + if (ObjBegin <= PartBegin && PartEnd <= ObjEnd) + break; + } + if (MFI.isFixedObjectIndex(FI)) { + SDValue Addr = + DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT), + DAG.getIntPtrConstant(Ins[i].PartOffset, dl)); + return DAG.getLoad( + ValVT, dl, Chain, Addr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI, + Ins[i].PartOffset)); + } } + } - // Adjust SP offset of interrupt parameter. - if (CallConv == CallingConv::X86_INTR) { - MFI.setObjectOffset(FI, Offset); - } + int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, + VA.getLocMemOffset(), isImmutable); - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); - SDValue Val = DAG.getLoad( - ValVT, dl, Chain, FIN, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); - return ExtendedInMem ? - DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val; + // Set SExt or ZExt flag. + if (VA.getLocInfo() == CCValAssign::ZExt) { + MFI.setObjectZExt(FI, true); + } else if (VA.getLocInfo() == CCValAssign::SExt) { + MFI.setObjectSExt(FI, true); } + + // Adjust SP offset of interrupt parameter. + if (CallConv == CallingConv::X86_INTR) { + MFI.setObjectOffset(FI, Offset); + } + + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + SDValue Val = DAG.getLoad( + ValVT, dl, Chain, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); + return ExtendedInMem ? DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) + : Val; } // FIXME: Get this from tablegen. Index: llvm/trunk/test/CodeGen/AArch64/arm64-abi-varargs.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-abi-varargs.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-abi-varargs.ll @@ -3,7 +3,7 @@ ; rdar://13625505 ; Here we have 9 fixed integer arguments the 9th argument in on stack, the ; varargs start right after at 8-byte alignment. -define void @fn9(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, ...) nounwind noinline ssp { +define void @fn9(i32* %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, ...) nounwind noinline ssp { ; CHECK-LABEL: fn9: ; 9th fixed argument ; CHECK: ldr {{w[0-9]+}}, [sp, #64] @@ -30,7 +30,6 @@ %a10 = alloca i32, align 4 %a11 = alloca i32, align 4 %a12 = alloca i32, align 4 - store i32 %a1, i32* %1, align 4 store i32 %a2, i32* %2, align 4 store i32 %a3, i32* %3, align 4 store i32 %a4, i32* %4, align 4 @@ -39,6 +38,7 @@ store i32 %a7, i32* %7, align 4 store i32 %a8, i32* %8, align 4 store i32 %a9, i32* %9, align 4 + store i32 %a9, i32* %a1 %10 = bitcast i8** %args to i8* call void @llvm.va_start(i8* %10) %11 = va_arg i8** %args, i32 @@ -93,7 +93,7 @@ %10 = load i32, i32* %a10, align 4 %11 = load i32, i32* %a11, align 4 %12 = load i32, i32* %a12, align 4 - call void (i32, i32, i32, i32, i32, i32, i32, i32, i32, ...) @fn9(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12) + call void (i32*, i32, i32, i32, i32, i32, i32, i32, i32, ...) @fn9(i32* %a1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12) ret i32 0 } Index: llvm/trunk/test/CodeGen/ARM/arg-copy-elide.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/arg-copy-elide.ll +++ llvm/trunk/test/CodeGen/ARM/arg-copy-elide.ll @@ -0,0 +1,61 @@ +; RUN: llc -mtriple=armv7-linux < %s | FileCheck %s + +declare arm_aapcscc void @addrof_i32(i32*) +declare arm_aapcscc void @addrof_i64(i64*) + +define arm_aapcscc void @simple(i32, i32, i32, i32, i32 %x) { +entry: + %x.addr = alloca i32 + store i32 %x, i32* %x.addr + call void @addrof_i32(i32* %x.addr) + ret void +} + +; CHECK-LABEL: simple: +; CHECK: push {r11, lr} +; CHECK: add r0, sp, #8 +; CHECK: bl addrof_i32 +; CHECK: pop {r11, pc} + + +; We need to load %x before calling addrof_i32 now because it could mutate %x in +; place. + +define arm_aapcscc i32 @use_arg(i32, i32, i32, i32, i32 %x) { +entry: + %x.addr = alloca i32 + store i32 %x, i32* %x.addr + call void @addrof_i32(i32* %x.addr) + ret i32 %x +} + +; CHECK-LABEL: use_arg: +; CHECK: push {[[csr:[^ ]*]], lr} +; CHECK: ldr [[csr]], [sp, #8] +; CHECK: add r0, sp, #8 +; CHECK: bl addrof_i32 +; CHECK: mov r0, [[csr]] +; CHECK: pop {[[csr]], pc} + + +define arm_aapcscc i64 @split_i64(i32, i32, i32, i32, i64 %x) { +entry: + %x.addr = alloca i64, align 4 + store i64 %x, i64* %x.addr, align 4 + call void @addrof_i64(i64* %x.addr) + ret i64 %x +} + +; CHECK-LABEL: split_i64: +; CHECK: push {r4, r5, r11, lr} +; CHECK: sub sp, sp, #8 +; CHECK: ldr r4, [sp, #28] +; CHECK: ldr r5, [sp, #24] +; CHECK: mov r0, sp +; CHECK: str r4, [sp, #4] +; CHECK: str r5, [sp] +; CHECK: bl addrof_i64 +; CHECK: mov r0, r5 +; CHECK: mov r1, r4 +; CHECK: add sp, sp, #8 +; CHECK: pop {r4, r5, r11, pc} Index: llvm/trunk/test/CodeGen/Mips/o32_cc_vararg.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/o32_cc_vararg.ll +++ llvm/trunk/test/CodeGen/Mips/o32_cc_vararg.ll @@ -236,8 +236,8 @@ ret i32 %tmp ; CHECK-LABEL: va9: -; CHECK: addiu $sp, $sp, -32 -; CHECK: lw $2, 52($sp) +; CHECK: addiu $sp, $sp, -24 +; CHECK: lw $2, 44($sp) } ; double Index: llvm/trunk/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll +++ llvm/trunk/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll @@ -8,9 +8,10 @@ @.str = internal constant [4 x i8] c"%p\0A\00" ; <[4 x i8]*> [#uses=1] @llvm.used = appending global [1 x i8*] [i8* bitcast (i8* (%struct.S*, i32, %struct.S*)* @_Z4test1SiS_ to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] -; Verify that %esi gets spilled before the call. +; Verify that %s1 gets spilled before the call. ; CHECK: Z4test1SiS -; CHECK: movl %esi,{{.*}}(%ebp) +; CHECK: leal 8(%ebp), %[[reg:[^ ]*]] +; CHECK: movl %[[reg]],{{.*}}(%ebp) ## 4-byte Spill ; CHECK: calll __Z6throwsv define i8* @_Z4test1SiS_(%struct.S* byval %s1, i32 %n, %struct.S* byval %s2) ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { Index: llvm/trunk/test/CodeGen/X86/arg-copy-elide.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/arg-copy-elide.ll +++ llvm/trunk/test/CodeGen/X86/arg-copy-elide.ll @@ -0,0 +1,280 @@ +; RUN: llc -mtriple=i686-windows < %s | FileCheck %s + +declare void @addrof_i32(i32*) +declare void @addrof_i64(i64*) +declare void @addrof_i128(i128*) +declare void @addrof_i32_x3(i32*, i32*, i32*) + +define void @simple(i32 %x) { +entry: + %x.addr = alloca i32 + store i32 %x, i32* %x.addr + call void @addrof_i32(i32* %x.addr) + ret void +} + +; CHECK-LABEL: _simple: +; CHECK: leal 4(%esp), %[[reg:[^ ]*]] +; CHECK: pushl %[[reg]] +; CHECK: calll _addrof_i32 +; CHECK: retl + + +; We need to load %x before calling addrof_i32 now because it could mutate %x in +; place. + +define i32 @use_arg(i32 %x) { +entry: + %x.addr = alloca i32 + store i32 %x, i32* %x.addr + call void @addrof_i32(i32* %x.addr) + ret i32 %x +} + +; CHECK-LABEL: _use_arg: +; CHECK: pushl %[[csr:[^ ]*]] +; CHECK-DAG: movl 8(%esp), %[[csr]] +; CHECK-DAG: leal 8(%esp), %[[reg:[^ ]*]] +; CHECK: pushl %[[reg]] +; CHECK: calll _addrof_i32 +; CHECK: movl %[[csr]], %eax +; CHECK: popl %[[csr]] +; CHECK: retl + + +define i64 @split_i64(i64 %x) { +entry: + %x.addr = alloca i64, align 4 + store i64 %x, i64* %x.addr, align 4 + call void @addrof_i64(i64* %x.addr) + ret i64 %x +} + +; CHECK-LABEL: _split_i64: +; CHECK: pushl %ebp +; CHECK: movl %esp, %ebp +; CHECK: pushl %[[csr2:[^ ]*]] +; CHECK: pushl %[[csr1:[^ ]*]] +; CHECK: andl $-8, %esp +; CHECK-DAG: movl 8(%ebp), %[[csr1]] +; CHECK-DAG: movl 12(%ebp), %[[csr2]] +; CHECK-DAG: leal 8(%ebp), %[[reg:[^ ]*]] +; CHECK: pushl %[[reg]] +; CHECK: calll _addrof_i64 +; CHECK-DAG: movl %[[csr1]], %eax +; CHECK-DAG: movl %[[csr2]], %edx +; CHECK: leal -8(%ebp), %esp +; CHECK: popl %[[csr1]] +; CHECK: popl %[[csr2]] +; CHECK: popl %ebp +; CHECK: retl + + +; We can't copy elide when an i64 is split between registers and memory in a +; fastcc function. + +define fastcc i64 @fastcc_split_i64(i64* %p, i64 %x) { +entry: + %x.addr = alloca i64, align 4 + store i64 %x, i64* %x.addr, align 4 + call void @addrof_i64(i64* %x.addr) + ret i64 %x +} + +; CHECK-LABEL: _fastcc_split_i64: +; CHECK: pushl %ebp +; CHECK: movl %esp, %ebp +; CHECK-DAG: movl %edx, %[[r1:[^ ]*]] +; CHECK-DAG: movl 8(%ebp), %[[r2:[^ ]*]] +; CHECK-DAG: movl %[[r2]], 4(%esp) +; CHECK-DAG: movl %[[r1]], (%esp) +; CHECK: movl %esp, %[[reg:[^ ]*]] +; CHECK: pushl %[[reg]] +; CHECK: calll _addrof_i64 +; CHECK: popl %ebp +; CHECK: retl + + +; We can't copy elide when it would reduce the user requested alignment. + +define void @high_alignment(i32 %x) { +entry: + %x.p = alloca i32, align 128 + store i32 %x, i32* %x.p + call void @addrof_i32(i32* %x.p) + ret void +} + +; CHECK-LABEL: _high_alignment: +; CHECK: andl $-128, %esp +; CHECK: movl 8(%ebp), %[[reg:[^ ]*]] +; CHECK: movl %[[reg]], (%esp) +; CHECK: movl %esp, %[[reg:[^ ]*]] +; CHECK: pushl %[[reg]] +; CHECK: calll _addrof_i32 +; CHECK: retl + + +; We can't copy elide when it would reduce the ABI required alignment. +; FIXME: We should lower the ABI alignment of i64 on Windows, since MSVC +; doesn't guarantee it. + +define void @abi_alignment(i64 %x) { +entry: + %x.p = alloca i64 + store i64 %x, i64* %x.p + call void @addrof_i64(i64* %x.p) + ret void +} + +; CHECK-LABEL: _abi_alignment: +; CHECK: andl $-8, %esp +; CHECK: movl 8(%ebp), %[[reg:[^ ]*]] +; CHECK: movl %[[reg]], (%esp) +; CHECK: movl %esp, %[[reg:[^ ]*]] +; CHECK: pushl %[[reg]] +; CHECK: calll _addrof_i64 +; CHECK: retl + + +; The code we generate for this is unimportant. This is mostly a crash test. + +define void @split_i128(i128* %sret, i128 %x) { +entry: + %x.addr = alloca i128 + store i128 %x, i128* %x.addr + call void @addrof_i128(i128* %x.addr) + store i128 %x, i128* %sret + ret void +} + +; CHECK-LABEL: _split_i128: +; CHECK: pushl %ebp +; CHECK: calll _addrof_i128 +; CHECK: retl + + +; Check that we load all of x, y, and z before the call. + +define i32 @three_args(i32 %x, i32 %y, i32 %z) { +entry: + %z.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + %x.addr = alloca i32, align 4 + store i32 %z, i32* %z.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + store i32 %x, i32* %x.addr, align 4 + call void @addrof_i32_x3(i32* %x.addr, i32* %y.addr, i32* %z.addr) + %s1 = add i32 %x, %y + %sum = add i32 %s1, %z + ret i32 %sum +} + +; CHECK-LABEL: _three_args: +; CHECK: pushl %[[csr:[^ ]*]] +; CHECK-DAG: movl {{[0-9]+}}(%esp), %[[csr]] +; CHECK-DAG: addl {{[0-9]+}}(%esp), %[[csr]] +; CHECK-DAG: addl {{[0-9]+}}(%esp), %[[csr]] +; CHECK-DAG: leal 8(%esp), %[[x:[^ ]*]] +; CHECK-DAG: leal 12(%esp), %[[y:[^ ]*]] +; CHECK-DAG: leal 16(%esp), %[[z:[^ ]*]] +; CHECK: pushl %[[z]] +; CHECK: pushl %[[y]] +; CHECK: pushl %[[x]] +; CHECK: calll _addrof_i32_x3 +; CHECK: movl %[[csr]], %eax +; CHECK: popl %[[csr]] +; CHECK: retl + + +define void @two_args_same_alloca(i32 %x, i32 %y) { +entry: + %x.addr = alloca i32 + store i32 %x, i32* %x.addr + store i32 %y, i32* %x.addr + call void @addrof_i32(i32* %x.addr) + ret void +} + +; CHECK-LABEL: _two_args_same_alloca: +; CHECK: movl 8(%esp), {{.*}} +; CHECK: movl {{.*}}, 4(%esp) +; CHECK: leal 4(%esp), %[[reg:[^ ]*]] +; CHECK: pushl %[[reg]] +; CHECK: calll _addrof_i32 +; CHECK: retl + + +define void @avoid_byval(i32* byval %x) { +entry: + %x.p.p = alloca i32* + store i32* %x, i32** %x.p.p + call void @addrof_i32(i32* %x) + ret void +} + +; CHECK-LABEL: _avoid_byval: +; CHECK: leal {{[0-9]+}}(%esp), %[[reg:[^ ]*]] +; CHECK: pushl %[[reg]] +; CHECK: calll _addrof_i32 +; CHECK: retl + + +define void @avoid_inalloca(i32* inalloca %x) { +entry: + %x.p.p = alloca i32* + store i32* %x, i32** %x.p.p + call void @addrof_i32(i32* %x) + ret void +} + +; CHECK-LABEL: _avoid_inalloca: +; CHECK: leal {{[0-9]+}}(%esp), %[[reg:[^ ]*]] +; CHECK: pushl %[[reg]] +; CHECK: calll _addrof_i32 +; CHECK: retl + + +; Don't elide the copy when the alloca is escaped with a store. + +define void @escape_with_store(i32 %x) { + %x1 = alloca i32 + %x2 = alloca i32* + store i32* %x1, i32** %x2 + %x3 = load i32*, i32** %x2 + store i32 0, i32* %x3 + store i32 %x, i32* %x1 + call void @addrof_i32(i32* %x1) + ret void +} + +; CHECK-LABEL: _escape_with_store: +; CHECK-DAG: movl {{.*}}(%esp), %[[reg:[^ ]*]] +; CHECK-DAG: movl $0, [[offs:[0-9]*]](%esp) +; CHECK: movl %[[reg]], [[offs]](%esp) +; CHECK: calll _addrof_i32 + + +; This test case exposed issues with the use of TokenFactor. + +define void @sret_and_elide(i32* sret %sret, i32 %v) { + %v.p = alloca i32 + store i32 %v, i32* %v.p + call void @addrof_i32(i32* %v.p) + store i32 %v, i32* %sret + ret void +} + +; CHECK-LABEL: _sret_and_elide: +; CHECK: pushl +; CHECK: pushl +; CHECK: movl 12(%esp), %[[sret:[^ ]*]] +; CHECK: movl 16(%esp), %[[v:[^ ]*]] +; CHECK: leal 16(%esp), %[[reg:[^ ]*]] +; CHECK: pushl %[[reg]] +; CHECK: calll _addrof_i32 +; CHECK: movl %[[v]], (%[[sret]]) +; CHECK: movl %[[sret]], %eax +; CHECK: popl +; CHECK: popl +; CHECK: retl Index: llvm/trunk/test/CodeGen/X86/inline-asm-tied.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/inline-asm-tied.ll +++ llvm/trunk/test/CodeGen/X86/inline-asm-tied.ll @@ -1,31 +1,27 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -optimize-regalloc -regalloc=basic -no-integrated-as | FileCheck %s ; rdar://6992609 -; CHECK: movl %ecx, 4([[ESP:%e..]]) -; CHECK: movl 4([[ESP]]), [[EDX:%e..]] -; CHECK: movl [[EDX]], 4([[ESP]]) target triple = "i386-apple-darwin9.0" -@llvm.used = appending global [1 x i8*] [i8* bitcast (i64 (i64)* @_OSSwapInt64 to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] define i64 @_OSSwapInt64(i64 %_data) nounwind { entry: - %retval = alloca i64 ; [#uses=2] - %_data.addr = alloca i64 ; [#uses=4] - store i64 %_data, i64* %_data.addr - %tmp = load i64, i64* %_data.addr ; [#uses=1] - %0 = call i64 asm "bswap %eax\0A\09bswap %edx\0A\09xchgl %eax, %edx", "=A,0,~{dirflag},~{fpsr},~{flags}"(i64 %tmp) nounwind ; [#uses=1] - store i64 %0, i64* %_data.addr - %tmp1 = load i64, i64* %_data.addr ; [#uses=1] - store i64 %tmp1, i64* %retval - %1 = load i64, i64* %retval ; [#uses=1] - ret i64 %1 + %0 = call i64 asm "bswap %eax\0A\09bswap %edx\0A\09xchgl %eax, %%edx", "=A,0,~{dirflag},~{fpsr},~{flags}"(i64 %_data) nounwind + ret i64 %0 } +; CHECK-LABEL: __OSSwapInt64: +; CHECK-DAG: movl 8(%esp), %edx +; CHECK-DAG: movl 4(%esp), %eax +; CHECK: ## InlineAsm Start +; CHECK: ## InlineAsm End +; Everything is set up in EAX:EDX, return immediately. +; CHECK-NEXT: retl + ; The tied operands are not necessarily in the same order as the defs. ; PR13742 define i64 @swapped(i64 %x, i64 %y) nounwind { entry: - %x0 = call { i64, i64 } asm "foo", "=r,=r,1,0,~{dirflag},~{fpsr},~{flags}"(i64 %x, i64 %y) nounwind - %x1 = extractvalue { i64, i64 } %x0, 0 - ret i64 %x1 + %x0 = call { i64, i64 } asm "foo", "=r,=r,1,0,~{dirflag},~{fpsr},~{flags}"(i64 %x, i64 %y) nounwind + %x1 = extractvalue { i64, i64 } %x0, 0 + ret i64 %x1 } Index: llvm/trunk/test/CodeGen/X86/pr30430.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr30430.ll +++ llvm/trunk/test/CodeGen/X86/pr30430.ll @@ -30,14 +30,6 @@ ; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm15, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm14, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm13, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm12, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm11, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm10, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm9, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm8, (%rsp) ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero @@ -46,14 +38,14 @@ ; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm11 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm15 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm16 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm17 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm18 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm19 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm20 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm21 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm22 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm23 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp) @@ -62,14 +54,14 @@ ; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm8, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm9, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm10, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm11, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm12, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm13, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm14, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm15, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm16, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm17, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm18, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm19, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm20, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm21, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm22, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm23, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] @@ -104,11 +96,19 @@ ; CHECK-NEXT: # implicit-def: %YMM3 ; CHECK-NEXT: vmovaps %xmm1, %xmm3 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm3 -; CHECK-NEXT: # implicit-def: %ZMM16 -; CHECK-NEXT: vmovaps %zmm3, %zmm16 -; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm16, %zmm16 -; CHECK-NEXT: vmovaps %zmm16, {{[0-9]+}}(%rsp) +; CHECK-NEXT: # implicit-def: %ZMM24 +; CHECK-NEXT: vmovaps %zmm3, %zmm24 +; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm24, %zmm24 +; CHECK-NEXT: vmovaps %zmm24, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0 +; CHECK-NEXT: vmovss %xmm15, {{[0-9]+}}(%rsp) # 4-byte Spill +; CHECK-NEXT: vmovss %xmm8, {{[0-9]+}}(%rsp) # 4-byte Spill +; CHECK-NEXT: vmovss %xmm9, {{[0-9]+}}(%rsp) # 4-byte Spill +; CHECK-NEXT: vmovss %xmm10, {{[0-9]+}}(%rsp) # 4-byte Spill +; CHECK-NEXT: vmovss %xmm11, {{[0-9]+}}(%rsp) # 4-byte Spill +; CHECK-NEXT: vmovss %xmm12, {{[0-9]+}}(%rsp) # 4-byte Spill +; CHECK-NEXT: vmovss %xmm13, {{[0-9]+}}(%rsp) # 4-byte Spill +; CHECK-NEXT: vmovss %xmm14, (%rsp) # 4-byte Spill ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/sse-intrinsics-fast-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse-intrinsics-fast-isel.ll +++ llvm/trunk/test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -1653,12 +1653,8 @@ define void @test_mm_setcsr(i32 %a0) nounwind { ; X32-LABEL: test_mm_setcsr: ; X32: # BB#0: -; X32-NEXT: pushl %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %esp, %ecx -; X32-NEXT: movl %eax, (%esp) -; X32-NEXT: ldmxcsr (%ecx) -; X32-NEXT: popl %eax +; X32-NEXT: leal 4(%esp), %eax +; X32-NEXT: ldmxcsr (%eax) ; X32-NEXT: retl ; ; X64-LABEL: test_mm_setcsr: Index: llvm/trunk/test/DebugInfo/X86/discriminator.ll =================================================================== --- llvm/trunk/test/DebugInfo/X86/discriminator.ll +++ llvm/trunk/test/DebugInfo/X86/discriminator.ll @@ -59,4 +59,4 @@ ; CHECK: Address Line Column File ISA Discriminator Flags ; CHECK: ------------------ ------ ------ ------ --- ------------- ------------- -; CHECK: 0x0000000000000011 2 0 1 0 42 {{$}} +; CHECK: 0x000000000000000a 2 0 1 0 42 {{$}}