Index: llvm/trunk/include/llvm/CodeGen/FastISel.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/FastISel.h +++ llvm/trunk/include/llvm/CodeGen/FastISel.h @@ -241,9 +241,12 @@ } /// \brief Set the current block to which generated machine instructions will - /// be appended, and clear the local CSE map. + /// be appended. void startNewBlock(); + /// Flush the local value map and sink local values if possible. + void finishBasicBlock(); + /// \brief Return current debug location information. DebugLoc getCurDebugLoc() const { return DbgLoc; } @@ -560,6 +563,19 @@ /// \brief Removes dead local value instructions after SavedLastLocalvalue. void removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue); + struct InstOrderMap { + DenseMap Orders; + MachineInstr *FirstTerminator = nullptr; + unsigned FirstTerminatorOrder = std::numeric_limits::max(); + + void initialize(MachineBasicBlock *MBB); + }; + + /// Sinks the local value materialization instruction LocalMI to its first use + /// in the basic block, or deletes it if it is not used. + void sinkLocalValueMaterialization(MachineInstr &LocalMI, unsigned DefReg, + InstOrderMap &OrderMap); + /// \brief Insertion point before trying to select the current instruction. MachineBasicBlock::iterator SavedInsertPt; Index: llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h +++ llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -178,6 +178,8 @@ /// RegFixups - Registers which need to be replaced after isel is done. DenseMap RegFixups; + DenseSet RegsWithFixups; + /// StatepointStackSlots - A list of temporary stack slots (frame indices) /// used to spill values at a statepoint. We store them here to enable /// reuse of the same stack slots across different statepoints in different Index: llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -120,9 +120,10 @@ STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); /// Set the current block to which generated machine instructions will be -/// appended, and clear the local CSE map. +/// appended. void FastISel::startNewBlock() { - LocalValueMap.clear(); + assert(LocalValueMap.empty() && + "local values should be cleared after finishing a BB"); // Instructions are appended to FuncInfo.MBB. If the basic block already // contains labels or copies, use the last instruction as the last local @@ -133,6 +134,9 @@ LastLocalValue = EmitStartPt; } +/// Flush the local CSE map and sink anything we can. +void FastISel::finishBasicBlock() { flushLocalValueMap(); } + bool FastISel::lowerArguments() { if (!FuncInfo.CanLowerReturn) // Fallback to SDISel argument lowering code to deal with sret pointer @@ -153,13 +157,160 @@ return true; } +/// Return the defined register if this instruction defines exactly one +/// virtual register and uses no other virtual registers. Otherwise return 0. +static unsigned findSinkableLocalRegDef(MachineInstr &MI) { + unsigned RegDef = 0; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + if (MO.isDef()) { + if (RegDef) + return 0; + RegDef = MO.getReg(); + } else if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + // This is another use of a vreg. Don't try to sink it. + return 0; + } + } + return RegDef; +} + void FastISel::flushLocalValueMap() { + // Try to sink local values down to their first use so that we can give them a + // better debug location. This has the side effect of shrinking local value + // live ranges, which helps out fast regalloc. + if (LastLocalValue != EmitStartPt) { + // Sink local value materialization instructions between EmitStartPt and + // LastLocalValue. Visit them bottom-up, starting from LastLocalValue, to + // avoid inserting into the range that we're iterating over. + MachineBasicBlock::reverse_iterator RE = + EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt) + : FuncInfo.MBB->rend(); + MachineBasicBlock::reverse_iterator RI(LastLocalValue); + + InstOrderMap OrderMap; + for (; RI != RE;) { + MachineInstr &LocalMI = *RI; + ++RI; + bool Store = true; + if (!LocalMI.isSafeToMove(nullptr, Store)) + continue; + unsigned DefReg = findSinkableLocalRegDef(LocalMI); + if (DefReg == 0) + continue; + + sinkLocalValueMaterialization(LocalMI, DefReg, OrderMap); + } + } + LocalValueMap.clear(); LastLocalValue = EmitStartPt; recomputeInsertPt(); SavedInsertPt = FuncInfo.InsertPt; } +static bool isRegUsedByPhiNodes(unsigned DefReg, + FunctionLoweringInfo &FuncInfo) { + for (auto &P : FuncInfo.PHINodesToUpdate) + if (P.second == DefReg) + return true; + return false; +} + +/// Build a map of instruction orders. Return the first terminator and its +/// order. Consider EH_LABEL instructions to be terminators as well, since local +/// values for phis after invokes must be materialized before the call. +void FastISel::InstOrderMap::initialize(MachineBasicBlock *MBB) { + unsigned Order = 0; + for (MachineInstr &I : *MBB) { + if (!FirstTerminator && + (I.isTerminator() || (I.isEHLabel() && &I != &MBB->front()))) { + FirstTerminator = &I; + FirstTerminatorOrder = Order; + } + Orders[&I] = Order++; + } +} + +void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI, + unsigned DefReg, + InstOrderMap &OrderMap) { + // If this register is used by a register fixup, MRI will not contain all + // the uses until after register fixups, so don't attempt to sink or DCE + // this instruction. Register fixups typically come from no-op cast + // instructions, which replace the cast instruction vreg with the local + // value vreg. + if (FuncInfo.RegsWithFixups.count(DefReg)) + return; + + // We can DCE this instruction if there are no uses and it wasn't a + // materialized for a successor PHI node. + bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo); + if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) { + if (EmitStartPt == &LocalMI) + EmitStartPt = EmitStartPt->getPrevNode(); + DEBUG(dbgs() << "removing dead local value materialization " << LocalMI); + OrderMap.Orders.erase(&LocalMI); + LocalMI.eraseFromParent(); + return; + } + + // Number the instructions if we haven't yet so we can efficiently find the + // earliest use. + if (OrderMap.Orders.empty()) + OrderMap.initialize(FuncInfo.MBB); + + // Find the first user in the BB. + MachineInstr *FirstUser = nullptr; + unsigned FirstOrder = std::numeric_limits::max(); + for (MachineInstr &UseInst : MRI.use_nodbg_instructions(DefReg)) { + unsigned UseOrder = OrderMap.Orders[&UseInst]; + if (UseOrder < FirstOrder) { + FirstOrder = UseOrder; + FirstUser = &UseInst; + } + } + + // The insertion point will be the first terminator or the first user, + // whichever came first. If there was no terminator, this must be a + // fallthrough block and the insertion point is the end of the block. + MachineBasicBlock::instr_iterator SinkPos; + if (UsedByPHI && OrderMap.FirstTerminatorOrder < FirstOrder) { + FirstOrder = OrderMap.FirstTerminatorOrder; + SinkPos = OrderMap.FirstTerminator->getIterator(); + } else if (FirstUser) { + SinkPos = FirstUser->getIterator(); + } else { + assert(UsedByPHI && "must be users if not used by a phi"); + SinkPos = FuncInfo.MBB->instr_end(); + } + + // Collect all DBG_VALUEs before the new insertion position so that we can + // sink them. + SmallVector DbgValues; + for (MachineInstr &DbgVal : MRI.use_instructions(DefReg)) { + if (!DbgVal.isDebugValue()) + continue; + unsigned UseOrder = OrderMap.Orders[&DbgVal]; + if (UseOrder < FirstOrder) + DbgValues.push_back(&DbgVal); + } + + // Sink LocalMI before SinkPos and assign it the same DebugLoc. + DEBUG(dbgs() << "sinking local value to first use " << LocalMI); + FuncInfo.MBB->remove(&LocalMI); + FuncInfo.MBB->insert(SinkPos, &LocalMI); + if (SinkPos != FuncInfo.MBB->end()) + LocalMI.setDebugLoc(SinkPos->getDebugLoc()); + + // Sink any debug values that we've collected. + for (MachineInstr *DI : DbgValues) { + FuncInfo.MBB->remove(DI); + FuncInfo.MBB->insert(SinkPos, DI); + } +} + bool FastISel::hasTrivialKill(const Value *V) { // Don't consider constants or arguments to have trivial kills. const Instruction *I = dyn_cast(V); @@ -328,8 +479,10 @@ AssignedReg = Reg; else if (Reg != AssignedReg) { // Arrange for uses of AssignedReg to be replaced by uses of Reg. - for (unsigned i = 0; i < NumRegs; i++) + for (unsigned i = 0; i < NumRegs; i++) { FuncInfo.RegFixups[AssignedReg + i] = Reg + i; + FuncInfo.RegsWithFixups.insert(Reg + i); + } AssignedReg = Reg; } Index: llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -318,6 +318,7 @@ ArgDbgValues.clear(); ByValArgFrameIndexMap.clear(); RegFixups.clear(); + RegsWithFixups.clear(); StatepointStackSlots.clear(); StatepointSpillMaps.clear(); PreferredExtendType.clear(); Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1658,6 +1658,8 @@ FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end()); } + if (FastIS) + FastIS->finishBasicBlock(); FinishBasicBlock(); FuncInfo->PHINodesToUpdate.clear(); ElidedArgCopyInstrs.clear(); Index: llvm/trunk/test/CodeGen/AArch64/arm64-abi_align.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-abi_align.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-abi_align.ll @@ -290,13 +290,14 @@ ; Space for s2 is allocated at sp ; FAST-LABEL: caller42 -; FAST: sub sp, sp, #112 -; Space for s1 is allocated at fp-24 = sp+72 -; Space for s2 is allocated at sp+48 +; FAST: sub sp, sp, #96 +; Space for s1 is allocated at fp-24 = sp+56 ; FAST: sub x[[A:[0-9]+]], x29, #24 -; FAST: add x[[A:[0-9]+]], sp, #48 ; Call memcpy with size = 24 (0x18) ; FAST: orr {{x[0-9]+}}, xzr, #0x18 +; Space for s2 is allocated at sp+32 +; FAST: add x[[A:[0-9]+]], sp, #32 +; FAST: bl _memcpy %tmp = alloca %struct.s42, align 4 %tmp1 = alloca %struct.s42, align 4 %0 = bitcast %struct.s42* %tmp to i8* @@ -334,13 +335,16 @@ ; FAST-LABEL: caller42_stack ; Space for s1 is allocated at fp-24 -; Space for s2 is allocated at fp-48 ; FAST: sub x[[A:[0-9]+]], x29, #24 -; FAST: sub x[[B:[0-9]+]], x29, #48 ; Call memcpy with size = 24 (0x18) ; FAST: orr {{x[0-9]+}}, xzr, #0x18 -; FAST: str {{w[0-9]+}}, [sp] +; FAST: bl _memcpy +; Space for s2 is allocated at fp-48 +; FAST: sub x[[B:[0-9]+]], x29, #48 +; Call memcpy again +; FAST: bl _memcpy ; Address of s1 is passed on stack at sp+8 +; FAST: str {{w[0-9]+}}, [sp] ; FAST: str {{x[0-9]+}}, [sp, #8] ; FAST: str {{x[0-9]+}}, [sp, #16] %tmp = alloca %struct.s42, align 4 @@ -401,8 +405,6 @@ ; FAST: add x29, sp, #64 ; Space for s1 is allocated at sp+32 ; Space for s2 is allocated at sp -; FAST: add x1, sp, #32 -; FAST: mov x2, sp ; FAST: str {{x[0-9]+}}, [sp, #32] ; FAST: str {{x[0-9]+}}, [sp, #40] ; FAST: str {{x[0-9]+}}, [sp, #48] @@ -411,6 +413,8 @@ ; FAST: str {{x[0-9]+}}, [sp, #8] ; FAST: str {{x[0-9]+}}, [sp, #16] ; FAST: str {{x[0-9]+}}, [sp, #24] +; FAST: add x1, sp, #32 +; FAST: mov x2, sp %tmp = alloca %struct.s43, align 16 %tmp1 = alloca %struct.s43, align 16 %0 = bitcast %struct.s43* %tmp to i8* @@ -448,8 +452,6 @@ ; FAST: sub sp, sp, #112 ; Space for s1 is allocated at fp-32 = sp+64 ; Space for s2 is allocated at sp+32 -; FAST: sub x[[A:[0-9]+]], x29, #32 -; FAST: add x[[B:[0-9]+]], sp, #32 ; FAST: stur {{x[0-9]+}}, [x29, #-32] ; FAST: stur {{x[0-9]+}}, [x29, #-24] ; FAST: stur {{x[0-9]+}}, [x29, #-16] @@ -460,8 +462,10 @@ ; FAST: str {{x[0-9]+}}, [sp, #56] ; FAST: str {{w[0-9]+}}, [sp] ; Address of s1 is passed on stack at sp+8 -; FAST: str {{x[0-9]+}}, [sp, #8] -; FAST: str {{x[0-9]+}}, [sp, #16] +; FAST: sub x[[A:[0-9]+]], x29, #32 +; FAST: str x[[A]], [sp, #8] +; FAST: add x[[B:[0-9]+]], sp, #32 +; FAST: str x[[B]], [sp, #16] %tmp = alloca %struct.s43, align 16 %tmp1 = alloca %struct.s43, align 16 %0 = bitcast %struct.s43* %tmp to i8* Index: llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-call.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-call.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-call.ll @@ -80,15 +80,15 @@ entry: ; CHECK-LABEL: t2 ; CHECK: mov [[REG1:x[0-9]+]], xzr +; CHECK: mov x0, [[REG1]] ; CHECK: orr w1, wzr, #0xfffffff8 ; CHECK: orr [[REG2:w[0-9]+]], wzr, #0x3ff -; CHECK: orr [[REG3:w[0-9]+]], wzr, #0x2 -; CHECK: mov [[REG4:w[0-9]+]], wzr -; CHECK: orr [[REG5:w[0-9]+]], wzr, #0x1 -; CHECK: mov x0, [[REG1]] ; CHECK: uxth w2, [[REG2]] +; CHECK: orr [[REG3:w[0-9]+]], wzr, #0x2 ; CHECK: sxtb w3, [[REG3]] +; CHECK: mov [[REG4:w[0-9]+]], wzr ; CHECK: and w4, [[REG4]], #0x1 +; CHECK: orr [[REG5:w[0-9]+]], wzr, #0x1 ; CHECK: and w5, [[REG5]], #0x1 ; CHECK: bl _func2 %call = call i32 @func2(i64 zeroext 0, i32 signext -8, i16 zeroext 1023, i8 signext -254, i1 zeroext 0, i1 zeroext 1) Index: llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-gv.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-gv.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-gv.ll @@ -18,10 +18,10 @@ ; CHECK: @Rand ; CHECK: adrp [[REG1:x[0-9]+]], _seed@GOTPAGE ; CHECK: ldr [[REG2:x[0-9]+]], {{\[}}[[REG1]], _seed@GOTPAGEOFF{{\]}} -; CHECK: mov [[REG3:x[0-9]+]], #13849 -; CHECK: mov [[REG4:x[0-9]+]], #1309 ; CHECK: ldr [[REG5:x[0-9]+]], {{\[}}[[REG2]]{{\]}} +; CHECK: mov [[REG4:x[0-9]+]], #1309 ; CHECK: mul [[REG6:x[0-9]+]], [[REG5]], [[REG4]] +; CHECK: mov [[REG3:x[0-9]+]], #13849 ; CHECK: add [[REG7:x[0-9]+]], [[REG6]], [[REG3]] ; CHECK: and [[REG8:x[0-9]+]], [[REG7]], #0xffff ; CHECK: str [[REG8]], {{\[}}[[REG1]]{{\]}} Index: llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll @@ -8,8 +8,8 @@ ; ARM64: adrp x8, _message@PAGE ; ARM64: add x0, x8, _message@PAGEOFF ; ARM64: mov w9, wzr -; ARM64: mov x2, #80 ; ARM64: uxtb w1, w9 +; ARM64: mov x2, #80 ; ARM64: bl _memset call void @llvm.memset.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i8 0, i64 80, i1 false) ret void Index: llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel.ll @@ -95,6 +95,8 @@ define void @ands(i32* %addr) { ; CHECK-LABEL: ands: ; CHECK: tst [[COND:w[0-9]+]], #0x1 +; CHECK-NEXT: orr w{{[0-9]+}}, wzr, #0x2 +; CHECK-NEXT: orr w{{[0-9]+}}, wzr, #0x1 ; CHECK-NEXT: csel [[COND]], entry: %cond91 = select i1 undef, i32 1, i32 2 Index: llvm/trunk/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll @@ -51,10 +51,10 @@ ; CHECK-NEXT: blr x16 ; FAST-LABEL: jscall_patchpoint_codegen2: ; FAST: orr [[REG1:x[0-9]+]], xzr, #0x2 -; FAST-NEXT: orr [[REG2:w[0-9]+]], wzr, #0x4 -; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6 ; FAST-NEXT: str [[REG1]], [sp] +; FAST-NEXT: orr [[REG2:w[0-9]+]], wzr, #0x4 ; FAST-NEXT: str [[REG2]], [sp, #16] +; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6 ; FAST-NEXT: str [[REG3]], [sp, #24] ; FAST: Ltmp ; FAST-NEXT: mov x16, #281470681743360 @@ -87,14 +87,14 @@ ; CHECK-NEXT: blr x16 ; FAST-LABEL: jscall_patchpoint_codegen3: ; FAST: orr [[REG1:x[0-9]+]], xzr, #0x2 -; FAST-NEXT: orr [[REG2:w[0-9]+]], wzr, #0x4 -; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6 -; FAST-NEXT: orr [[REG4:w[0-9]+]], wzr, #0x8 -; FAST-NEXT: mov [[REG5:x[0-9]+]], #10 ; FAST-NEXT: str [[REG1]], [sp] +; FAST-NEXT: orr [[REG2:w[0-9]+]], wzr, #0x4 ; FAST-NEXT: str [[REG2]], [sp, #16] +; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6 ; FAST-NEXT: str [[REG3]], [sp, #24] +; FAST-NEXT: orr [[REG4:w[0-9]+]], wzr, #0x8 ; FAST-NEXT: str [[REG4]], [sp, #36] +; FAST-NEXT: mov [[REG5:x[0-9]+]], #10 ; FAST-NEXT: str [[REG5]], [sp, #48] ; FAST: Ltmp ; FAST-NEXT: mov x16, #281470681743360 Index: llvm/trunk/test/CodeGen/AArch64/swifterror.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/swifterror.ll +++ llvm/trunk/test/CodeGen/AArch64/swifterror.ll @@ -189,10 +189,10 @@ ; CHECK-O0:[[BB2]]: ; CHECK-O0: ldr x0, [sp, [[SLOT2]]] ; CHECK-O0: fcmp -; CHECK-O0: str x0, [sp] +; CHECK-O0: str x0, [sp, [[SLOT3:#[0-9]+]] ; CHECK-O0: b.le [[BB1]] ; reload from stack -; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp] +; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp, [[SLOT3]]] ; CHECK-O0: mov x21, [[ID3]] ; CHECK-O0: ret entry: Index: llvm/trunk/test/CodeGen/ARM/fast-isel-call.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/fast-isel-call.ll +++ llvm/trunk/test/CodeGen/ARM/fast-isel-call.ll @@ -95,51 +95,54 @@ define i32 @t10() { entry: ; ARM: @t10 -; ARM: movw [[R0:l?r[0-9]*]], #0 -; ARM: movw [[R1:l?r[0-9]*]], #248 -; ARM: movw [[R2:l?r[0-9]*]], #187 -; ARM: movw [[R3:l?r[0-9]*]], #28 -; ARM: movw [[R4:l?r[0-9]*]], #40 -; ARM: movw [[R5:l?r[0-9]*]], #186 -; ARM: and [[R0]], [[R0]], #255 -; ARM: and [[R1]], [[R1]], #255 -; ARM: and [[R2]], [[R2]], #255 -; ARM: and [[R3]], [[R3]], #255 -; ARM: and [[R4]], [[R4]], #255 -; ARM: str [[R4]], [sp] -; ARM: and [[R4]], [[R5]], #255 -; ARM: str [[R4]], [sp, #4] +; ARM-DAG: movw [[R0:l?r[0-9]*]], #0 +; ARM-DAG: movw [[R1:l?r[0-9]*]], #248 +; ARM-DAG: movw [[R2:l?r[0-9]*]], #187 +; ARM-DAG: movw [[R3:l?r[0-9]*]], #28 +; ARM-DAG: movw [[R4:l?r[0-9]*]], #40 +; ARM-DAG: movw [[R5:l?r[0-9]*]], #186 +; ARM-DAG: and [[R0]], [[R0]], #255 +; ARM-DAG: and [[R1]], [[R1]], #255 +; ARM-DAG: and [[R2]], [[R2]], #255 +; ARM-DAG: and [[R3]], [[R3]], #255 +; ARM-DAG: and [[R4]], [[R4]], #255 +; ARM-DAG: str [[R4]], [sp] +; ARM-DAG: and [[R4]], [[R5]], #255 +; ARM-DAG: str [[R4]], [sp, #4] ; ARM: bl {{_?}}bar -; ARM-LONG: @t10 +; ARM-LONG-LABEL: @t10 ; ARM-LONG-MACHO: {{(movw)|(ldr)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}} ; ARM-LONG-MACHO: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}} -; ARM-LONG-MACHO: ldr [[R]], {{\[}}[[R]]{{\]}} +; ARM-LONG-MACHO: str [[R]], [r7, [[SLOT:#[-0-9]+]]] @ 4-byte Spill +; ARM-LONG-MACHO: ldr [[R:l?r[0-9]*]], [r7, [[SLOT]]] @ 4-byte Reload ; ARM-LONG-ELF: movw [[R:l?r[0-9]*]], :lower16:bar ; ARM-LONG-ELF: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}} ; ARM-LONG: blx [[R]] ; THUMB: @t10 -; THUMB: movs [[R0:l?r[0-9]*]], #0 -; THUMB: movs [[R1:l?r[0-9]*]], #248 -; THUMB: movs [[R2:l?r[0-9]*]], #187 -; THUMB: movs [[R3:l?r[0-9]*]], #28 -; THUMB: movw [[R4:l?r[0-9]*]], #40 -; THUMB: movw [[R5:l?r[0-9]*]], #186 -; THUMB: and [[R0]], [[R0]], #255 -; THUMB: and [[R1]], [[R1]], #255 -; THUMB: and [[R2]], [[R2]], #255 -; THUMB: and [[R3]], [[R3]], #255 -; THUMB: and [[R4]], [[R4]], #255 -; THUMB: str.w [[R4]], [sp] -; THUMB: and [[R4]], [[R5]], #255 -; THUMB: str.w [[R4]], [sp, #4] +; THUMB-DAG: movs [[R0:l?r[0-9]*]], #0 +; THUMB-DAG: movs [[R1:l?r[0-9]*]], #248 +; THUMB-DAG: movs [[R2:l?r[0-9]*]], #187 +; THUMB-DAG: movs [[R3:l?r[0-9]*]], #28 +; THUMB-DAG: movw [[R4:l?r[0-9]*]], #40 +; THUMB-DAG: movw [[R5:l?r[0-9]*]], #186 +; THUMB-DAG: and [[R0]], [[R0]], #255 +; THUMB-DAG: and [[R1]], [[R1]], #255 +; THUMB-DAG: and [[R2]], [[R2]], #255 +; THUMB-DAG: and [[R3]], [[R3]], #255 +; THUMB-DAG: and [[R4]], [[R4]], #255 +; THUMB-DAG: str.w [[R4]], [sp] +; THUMB-DAG: and [[R4]], [[R5]], #255 +; THUMB-DAG: str.w [[R4]], [sp, #4] ; THUMB: bl {{_?}}bar -; THUMB-LONG: @t10 +; THUMB-LONG-LABEL: @t10 ; THUMB-LONG: {{(movw)|(ldr.n)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}} ; THUMB-LONG: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}} ; THUMB-LONG: ldr{{(.w)?}} [[R]], {{\[}}[[R]]{{\]}} +; THUMB-LONG: str [[R]], [sp, [[SLOT:#[-0-9]+]]] @ 4-byte Spill +; THUMB-LONG: ldr.w [[R:l?r[0-9]*]], [sp, [[SLOT]]] @ 4-byte Reload ; THUMB-LONG: blx [[R]] %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70) ret i32 0 @@ -152,14 +155,15 @@ } define void @foo3() uwtable { -; ARM: movw r0, #0 -; ARM: {{(movw r1, :lower16:_?bar0)|(ldr r1, .LCPI)}} -; ARM: {{(movt r1, :upper16:_?bar0)|(ldr r1, \[r1\])}} -; ARM: blx r1 -; THUMB: movs r0, #0 -; THUMB: {{(movw r1, :lower16:_?bar0)|(ldr.n r1, .LCPI)}} -; THUMB: {{(movt r1, :upper16:_?bar0)|(ldr r1, \[r1\])}} -; THUMB: blx r1 +; ARM: @foo3 +; ARM: {{(movw r[0-9]+, :lower16:_?bar0)|(ldr r[0-9]+, .LCPI)}} +; ARM: {{(movt r[0-9]+, :upper16:_?bar0)|(ldr r[0-9]+, \[r[0-9]+\])}} +; ARM: movw {{r[0-9]+}}, #0 +; ARM: blx {{r[0-9]+}} +; THUMB: {{(movw r[0-9]+, :lower16:_?bar0)|(ldr.n r[0-9]+, .LCPI)}} +; THUMB: {{(movt r[0-9]+, :upper16:_?bar0)|(ldr r[0-9]+, \[r[0-9]+\])}} +; THUMB: movs {{r[0-9]+}}, #0 +; THUMB: blx {{r[0-9]+}} %fptr = alloca i32 (i32)*, align 8 store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8 %1 = load i32 (i32)*, i32 (i32)** %fptr, align 8 @@ -171,7 +175,7 @@ entry: ; ARM: LibCall ; ARM: bl {{___udivsi3|__aeabi_uidiv}} -; ARM-LONG: LibCall +; ARM-LONG-LABEL: LibCall ; ARM-LONG-MACHO: {{(movw r2, :lower16:L___udivsi3\$non_lazy_ptr)|(ldr r2, .LCPI)}} ; ARM-LONG-MACHO: {{(movt r2, :upper16:L___udivsi3\$non_lazy_ptr)?}} @@ -183,7 +187,7 @@ ; ARM-LONG: blx r2 ; THUMB: LibCall ; THUMB: bl {{___udivsi3|__aeabi_uidiv}} -; THUMB-LONG: LibCall +; THUMB-LONG-LABEL: LibCall ; THUMB-LONG: {{(movw r2, :lower16:L___udivsi3\$non_lazy_ptr)|(ldr.n r2, .LCPI)}} ; THUMB-LONG: {{(movt r2, :upper16:L___udivsi3\$non_lazy_ptr)?}} ; THUMB-LONG: ldr r2, [r2] Index: llvm/trunk/test/CodeGen/ARM/fast-isel-intrinsic.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/fast-isel-intrinsic.ll +++ llvm/trunk/test/CodeGen/ARM/fast-isel-intrinsic.ll @@ -16,10 +16,10 @@ ; ARM-LABEL: t1: ; ARM: {{(movw r0, :lower16:_?message1)|(ldr r0, .LCPI)}} ; ARM: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}} -; ARM: add r0, r0, #5 -; ARM: movw r1, #64 -; ARM: movw r2, #10 -; ARM: and r1, r1, #255 +; ARM-DAG: add r0, r0, #5 +; ARM-DAG: movw r1, #64 +; ARM-DAG: movw r2, #10 +; ARM-DAG: and r1, r1, #255 ; ARM: bl {{_?}}memset ; ARM-LONG-LABEL: t1: @@ -36,8 +36,8 @@ ; THUMB: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}} ; THUMB: adds r0, #5 ; THUMB: movs r1, #64 -; THUMB: movs r2, #10 ; THUMB: and r1, r1, #255 +; THUMB: movs r2, #10 ; THUMB: bl {{_?}}memset ; THUMB-LONG-LABEL: t1: ; THUMB-LONG: movw r3, :lower16:L_memset$non_lazy_ptr @@ -62,10 +62,10 @@ ; ARM: add r1, r0, #4 ; ARM: add r0, r0, #16 -; ARM: movw r2, #17 ; ARM: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill ; ARM: mov r0, r1 ; ARM: ldr r1, [sp[[SLOT]]] @ 4-byte Reload +; ARM: movw r2, #17 ; ARM: bl {{_?}}memcpy ; ARM-LONG-LABEL: t2: @@ -83,10 +83,10 @@ ; THUMB: ldr r0, [r0] ; THUMB: adds r1, r0, #4 ; THUMB: adds r0, #16 -; THUMB: movs r2, #17 ; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill ; THUMB: mov r0, r1 ; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload +; THUMB: movs r2, #17 ; THUMB: bl {{_?}}memcpy ; THUMB-LONG-LABEL: t2: ; THUMB-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr @@ -112,8 +112,8 @@ ; ARM: add r1, r0, #4 ; ARM: add r0, r0, #16 -; ARM: movw r2, #10 ; ARM: mov r0, r1 +; ARM: movw r2, #10 ; ARM: bl {{_?}}memmove ; ARM-LONG-LABEL: t3: @@ -131,10 +131,10 @@ ; THUMB: ldr r0, [r0] ; THUMB: adds r1, r0, #4 ; THUMB: adds r0, #16 -; THUMB: movs r2, #10 ; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill ; THUMB: mov r0, r1 ; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload +; THUMB: movs r2, #10 ; THUMB: bl {{_?}}memmove ; THUMB-LONG-LABEL: t3: ; THUMB-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr Index: llvm/trunk/test/CodeGen/ARM/fast-isel-select.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/fast-isel-select.ll +++ llvm/trunk/test/CodeGen/ARM/fast-isel-select.ll @@ -6,16 +6,14 @@ define i32 @t1(i1 %c) nounwind readnone { entry: ; ARM: t1 -; ARM: movw r{{[1-9]}}, #10 ; ARM: tst r0, #1 -; ARM: moveq r{{[1-9]}}, #20 -; ARM: mov r0, r{{[1-9]}} +; ARM: movw r0, #10 +; ARM: moveq r0, #20 ; THUMB: t1 -; THUMB: movs r{{[1-9]}}, #10 ; THUMB: tst.w r0, #1 +; THUMB: movw r0, #10 ; THUMB: it eq -; THUMB: moveq r{{[1-9]}}, #20 -; THUMB: mov r0, r{{[1-9]}} +; THUMB: moveq r0, #20 %0 = select i1 %c, i32 10, i32 20 ret i32 %0 } @@ -26,7 +24,7 @@ ; ARM: tst r0, #1 ; ARM: moveq r{{[1-9]}}, #20 ; ARM: mov r0, r{{[1-9]}} -; THUMB: t2 +; THUMB-LABEL: t2 ; THUMB: tst.w r0, #1 ; THUMB: it eq ; THUMB: moveq r{{[1-9]}}, #20 @@ -54,16 +52,14 @@ define i32 @t4(i1 %c) nounwind readnone { entry: ; ARM: t4 -; ARM: mvn r{{[1-9]}}, #9 ; ARM: tst r0, #1 -; ARM: mvneq r{{[1-9]}}, #0 -; ARM: mov r0, r{{[1-9]}} +; ARM: mvn r0, #9 +; ARM: mvneq r0, #0 ; THUMB-LABEL: t4 -; THUMB: mvn [[REG:r[1-9]+]], #9 ; THUMB: tst.w r0, #1 +; THUMB: mvn r0, #9 ; THUMB: it eq -; THUMB: mvneq [[REG]], #0 -; THUMB: mov r0, [[REG]] +; THUMB: mvneq r0, #0 %0 = select i1 %c, i32 -10, i32 -1 ret i32 %0 } Index: llvm/trunk/test/CodeGen/ARM/fast-isel-vararg.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/fast-isel-vararg.ll +++ llvm/trunk/test/CodeGen/ARM/fast-isel-vararg.ll @@ -17,23 +17,24 @@ %4 = load i32, i32* %n, align 4 ; ARM: VarArg ; ARM: mov [[FP:r[0-9]+]], sp -; ARM: sub sp, sp, #32 -; ARM: movw r0, #5 +; ARM: sub sp, sp, #{{(36|40)}} ; ARM: ldr r1, {{\[}}[[FP]], #-4] ; ARM: ldr r2, {{\[}}[[FP]], #-8] ; ARM: ldr r3, {{\[}}[[FP]], #-12] -; ARM: ldr [[Ra:r[0-9]+]], [sp, #16] -; ARM: ldr [[Rb:[lr]+[0-9]*]], [sp, #12] -; ARM: str [[Ra]], [sp] +; ARM: ldr [[Ra:r[0-9]+]], {{\[}}[[FP]], #-16] +; ARM: ldr [[Rb:[lr]+[0-9]*]], [sp, #{{(16|20)}}] +; ARM: movw [[Rc:[lr]+[0-9]*]], #5 +; Ra got spilled +; ARM: mov r0, [[Rc]] +; ARM: str {{.*}}, [sp] ; ARM: str [[Rb]], [sp, #4] ; ARM: bl {{_?CallVariadic}} -; THUMB: sub sp, #32 -; THUMB: movs r0, #5 -; THUMB: ldr r1, [sp, #28] -; THUMB: ldr r2, [sp, #24] -; THUMB: ldr r3, [sp, #20] +; THUMB: sub sp, #{{36}} +; THUMB: ldr r1, [sp, #32] +; THUMB: ldr r2, [sp, #28] +; THUMB: ldr r3, [sp, #24] +; THUMB: ldr {{[a-z0-9]+}}, [sp, #20] ; THUMB: ldr.w {{[a-z0-9]+}}, [sp, #16] -; THUMB: ldr.w {{[a-z0-9]+}}, [sp, #12] ; THUMB: str.w {{[a-z0-9]+}}, [sp] ; THUMB: str.w {{[a-z0-9]+}}, [sp, #4] ; THUMB: bl {{_?}}CallVariadic Index: llvm/trunk/test/CodeGen/ARM/swifterror.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/swifterror.ll +++ llvm/trunk/test/CodeGen/ARM/swifterror.ll @@ -188,11 +188,10 @@ ; CHECK-O0: mov r{{.*}}, r8 ; CHECK-O0: cmp r{{.*}}, #0 ; CHECK-O0: beq -; CHECK-O0-DAG: movw r{{.*}}, #1 -; CHECK-O0-DAG: mov r{{.*}}, #16 +; CHECK-O0: mov r0, #16 ; CHECK-O0: malloc ; CHECK-O0-DAG: mov [[ID:r[0-9]+]], r0 -; CHECK-O0-DAG: ldr [[ID2:r[0-9]+]], [sp{{.*}}] +; CHECK-O0-DAG: movw [[ID2:.*]], #1 ; CHECK-O0: strb [[ID2]], [{{.*}}[[ID]], #8] ; spill r0 ; CHECK-O0: str r0, [sp{{.*}}] Index: llvm/trunk/test/CodeGen/Mips/Fast-ISel/callabi.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/Fast-ISel/callabi.ll +++ llvm/trunk/test/CodeGen/Mips/Fast-ISel/callabi.ll @@ -163,32 +163,26 @@ define void @cxcccc() { ; ALL-LABEL: cxcccc: - ; ALL-DAG: addiu $[[T0:[0-9]+]], $zero, 88 - ; ALL-DAG: addiu $[[T1:[0-9]+]], $zero, 44 - ; ALL-DAG: addiu $[[T2:[0-9]+]], $zero, 11 - ; ALL-DAG: addiu $[[T3:[0-9]+]], $zero, 33 + ; ALL: addiu $[[R:[0-9]+]], $zero, 88 + ; 32R1: sll $[[R:[0-9]+]], $[[R]], 24 + ; 32R1: sra $4, $[[R]], 24 + ; 32R2: seb $4, $[[R]] + ; ALL: addiu $[[R:[0-9]+]], $zero, 44 + ; 32R1: sll $[[R:[0-9]+]], $[[R]], 24 + ; 32R1: sra $5, $[[R]], 24 + ; 32R2: seb $5, $[[R]] + ; ALL: addiu $[[R:[0-9]+]], $zero, 11 + ; 32R1: sll $[[R:[0-9]+]], $[[R]], 24 + ; 32R1: sra $6, $[[R]], 24 + ; 32R2: seb $6, $[[R]] + ; ALL: addiu $[[R:[0-9]+]], $zero, 33 + ; 32R1: sll $[[R:[0-9]+]], $[[R]], 24 + ; 32R1: sra $7, $[[R]], 24 + ; 32R2: seb $7, $[[R]] - ; FIXME: We should avoid the unnecessary spill/reload here. - - ; 32R1-DAG: sll $[[T4:[0-9]+]], $[[T0]], 24 - ; 32R1-DAG: sra $[[T5:[0-9]+]], $[[T4]], 24 - ; 32R1-DAG: sw $4, 16($sp) - ; 32R1-DAG: move $4, $[[T5]] - ; 32R1-DAG: sll $[[T6:[0-9]+]], $[[T1]], 24 - ; 32R1-DAG: sra $5, $[[T6]], 24 - ; 32R1-DAG: sll $[[T7:[0-9]+]], $[[T2]], 24 - ; 32R1-DAG: sra $6, $[[T7]], 24 - ; 32R1: lw $[[T8:[0-9]+]], 16($sp) - ; 32R1: sll $[[T9:[0-9]+]], $[[T8]], 24 - ; 32R1: sra $7, $[[T9]], 24 - - ; 32R2-DAG: seb $[[T4:[0-9]+]], $[[T0]] - ; 32R2-DAG: sw $4, 16($sp) - ; 32R2-DAG: move $4, $[[T4]] - ; 32R2-DAG: seb $5, $[[T1]] - ; 32R2-DAG: seb $6, $[[T2]] - ; 32R2-DAG: lw $[[T5:[0-9]+]], 16($sp) - ; 32R2: seb $7, $[[T5]] + ; ALL: lw $25, %got(xcccc)($2) + ; ALL: jalr $25 + ; ALL: jr $ra call void @xcccc(i8 88, i8 44, i8 11, i8 33) ret void } @@ -198,32 +192,27 @@ define void @cxhhhh() { ; ALL-LABEL: cxhhhh: - ; ALL-DAG: addiu $[[T0:[0-9]+]], $zero, 88 - ; ALL-DAG: addiu $[[T1:[0-9]+]], $zero, 44 - ; ALL-DAG: addiu $[[T2:[0-9]+]], $zero, 11 - ; ALL-DAG: addiu $[[T3:[0-9]+]], $zero, 33 + ; ALL: addiu $[[R:[0-9]+]], $zero, 88 + ; 32R1: sll $[[R]], $[[R]], 16 + ; 32R1: sra $4, $[[R]], 16 + ; 32R2: seh $4, $[[R]] + ; ALL: addiu $[[R:[0-9]+]], $zero, 44 + ; 32R1: sll $[[R]], $[[R]], 16 + ; 32R1: sra $5, $[[R]], 16 + ; 32R2: seh $5, $[[R]] + ; ALL: addiu $[[R:[0-9]+]], $zero, 11 + ; 32R1: sll $[[R]], $[[R]], 16 + ; 32R1: sra $6, $[[R]], 16 + ; 32R2: seh $6, $[[R]] + ; ALL: addiu $[[R:[0-9]+]], $zero, 33 + ; 32R1: sll $[[R]], $[[R]], 16 + ; 32R1: sra $7, $[[R]], 16 + ; 32R2: seh $7, $[[R]] - ; FIXME: We should avoid the unnecessary spill/reload here. + ; ALL: lw $25, %got(xhhhh)($2) + ; ALL: jalr $25 + ; ALL: jr $ra - ; 32R1-DAG: sll $[[T4:[0-9]+]], $[[T0]], 16 - ; 32R1-DAG: sra $[[T5:[0-9]+]], $[[T4]], 16 - ; 32R1-DAG: sw $4, 16($sp) - ; 32R1-DAG: move $4, $[[T5]] - ; 32R1-DAG: sll $[[T6:[0-9]+]], $[[T1]], 16 - ; 32R1-DAG: sra $5, $[[T6]], 16 - ; 32R1-DAG: sll $[[T7:[0-9]+]], $[[T2]], 16 - ; 32R1-DAG: sra $6, $[[T7]], 16 - ; 32R1: lw $[[T8:[0-9]+]], 16($sp) - ; 32R1: sll $[[T9:[0-9]+]], $[[T8]], 16 - ; 32R1: sra $7, $[[T9]], 16 - - ; 32R2-DAG: seh $[[T4:[0-9]+]], $[[T0]] - ; 32R2-DAG: sw $4, 16($sp) - ; 32R2-DAG: move $4, $[[T4]] - ; 32R2-DAG: seh $5, $[[T1]] - ; 32R2-DAG: seh $6, $[[T2]] - ; 32R2-DAG: lw $[[T5:[0-9]+]], 16($sp) - ; 32R2: seh $7, $[[T5]] call void @xhhhh(i16 88, i16 44, i16 11, i16 33) ret void } @@ -437,13 +426,13 @@ ; ALL-LABEL: cxiff: ; ALL: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}} - ; ALL-DAG: addiu $4, $zero, 12239 ; ALL-DAG: lui $[[REGF0_1:[0-9]+]], 17526 ; ALL-DAG: ori $[[REGF0_2:[0-9]+]], $[[REGF0_1]], 55706 ; ALL-DAG: mtc1 $[[REGF0_2]], $f[[REGF0_3:[0-9]+]] ; ALL-DAG: lui $[[REGF1_1:[0-9]+]], 16543 ; ALL-DAG: ori $[[REGF1_2:[0-9]+]], $[[REGF1_1]], 65326 ; ALL: mtc1 $[[REGF1_2]], $f[[REGF1_3:[0-9]+]] + ; ALL-DAG: addiu $4, $zero, 12239 ; ALL-DAG: mfc1 $5, $f[[REGF0_3]] ; ALL-DAG: mfc1 $6, $f[[REGF1_3]] ; ALL-DAG: lw $25, %got(xiff)($[[REG_GP]]) @@ -481,11 +470,11 @@ ; ALL-DAG: lui $[[REGF0_1:[0-9]+]], 17527 ; ALL-DAG: ori $[[REGF0_2:[0-9]+]], $[[REGF0_1]], 2015 ; ALL-DAG: mtc1 $[[REGF0_2]], $f[[REGF0_3:[0-9]+]] - ; ALL-DAG: addiu $6, $zero, 9991 ; ALL-DAG: lui $[[REGF1_1:[0-9]+]], 17802 ; ALL-DAG: ori $[[REGF1_2:[0-9]+]], $[[REGF1_1]], 58470 ; ALL: mtc1 $[[REGF1_2]], $f[[REGF1_3:[0-9]+]] ; ALL-DAG: mfc1 $5, $f[[REGF0_3]] + ; ALL-DAG: addiu $6, $zero, 9991 ; ALL-DAG: mfc1 $7, $f[[REGF1_3]] ; ALL-DAG: lw $25, %got(xifif)($[[REG_GP]]) ; ALL: jalr $25 @@ -500,16 +489,16 @@ ; ALL-LABEL: cxiffi: ; ALL: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}} - ; ALL-DAG: addiu $4, $zero, 45 ; ALL-DAG: lui $[[REGF0_1:[0-9]+]], 16307 ; ALL-DAG: ori $[[REGF0_2:[0-9]+]], $[[REGF0_1]], 13107 ; ALL-DAG: mtc1 $[[REGF0_2]], $f[[REGF0_3:[0-9]+]] ; ALL-DAG: lui $[[REGF1_1:[0-9]+]], 17529 ; ALL-DAG: ori $[[REGF1_2:[0-9]+]], $[[REGF1_1]], 39322 ; ALL: mtc1 $[[REGF1_2]], $f[[REGF1_3:[0-9]+]] - ; ALL-DAG: addiu $7, $zero, 234 + ; ALL-DAG: addiu $4, $zero, 45 ; ALL-DAG: mfc1 $5, $f[[REGF0_3]] ; ALL-DAG: mfc1 $6, $f[[REGF1_3]] + ; ALL-DAG: addiu $7, $zero, 234 ; ALL-DAG: lw $25, %got(xiffi)($[[REG_GP]]) ; ALL: jalr $25 call void @xiffi(i32 45, float 0x3FF6666660000000, Index: llvm/trunk/test/CodeGen/Mips/Fast-ISel/simplestore.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/Fast-ISel/simplestore.ll +++ llvm/trunk/test/CodeGen/Mips/Fast-ISel/simplestore.ll @@ -9,8 +9,8 @@ define void @foo() { entry: store i32 12345, i32* @abcd, align 4 -; CHECK: addiu $[[REG1:[0-9]+]], $zero, 12345 ; CHECK: lw $[[REG2:[0-9]+]], %got(abcd)(${{[0-9]+}}) +; CHECK: addiu $[[REG1:[0-9]+]], $zero, 12345 ; CHECK: sw $[[REG1]], 0($[[REG2]]) ret void } Index: llvm/trunk/test/CodeGen/Mips/Fast-ISel/simplestorei.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/Fast-ISel/simplestorei.ll +++ llvm/trunk/test/CodeGen/Mips/Fast-ISel/simplestorei.ll @@ -10,8 +10,8 @@ entry: store i32 32767, i32* @ijk, align 4 ; CHECK: .ent si2_1 -; CHECK: addiu $[[REG1:[0-9]+]], $zero, 32767 ; CHECK: lw $[[REG2:[0-9]+]], %got(ijk)(${{[0-9]+}}) +; CHECK: addiu $[[REG1:[0-9]+]], $zero, 32767 ; CHECK: sw $[[REG1]], 0($[[REG2]]) ret void @@ -34,8 +34,8 @@ entry: store i32 65535, i32* @ijk, align 4 ; CHECK: .ent ui2_1 -; CHECK: ori $[[REG1:[0-9]+]], $zero, 65535 ; CHECK: lw $[[REG2:[0-9]+]], %got(ijk)(${{[0-9]+}}) +; CHECK: ori $[[REG1:[0-9]+]], $zero, 65535 ; CHECK: sw $[[REG1]], 0($[[REG2]]) ret void } @@ -45,8 +45,8 @@ entry: store i32 983040, i32* @ijk, align 4 ; CHECK: .ent ui4_1 -; CHECK: lui $[[REG1:[0-9]+]], 15 ; CHECK: lw $[[REG2:[0-9]+]], %got(ijk)(${{[0-9]+}}) +; CHECK: lui $[[REG1:[0-9]+]], 15 ; CHECK: sw $[[REG1]], 0($[[REG2]]) ret void } Index: llvm/trunk/test/CodeGen/X86/avx512-mask-zext-bugfix.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-mask-zext-bugfix.ll +++ llvm/trunk/test/CodeGen/X86/avx512-mask-zext-bugfix.ll @@ -17,25 +17,21 @@ define void @test_xmm(i32 %shift, i32 %mulp, <2 x i64> %a,i8* %arraydecay,i8* %fname){ ; CHECK-LABEL: test_xmm: ; CHECK: ## %bb.0: -; CHECK-NEXT: subq $72, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: movl $4, %eax +; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: vpmovw2m %xmm0, %k0 ; CHECK-NEXT: movl $2, %esi -; CHECK-NEXT: movl $8, %edi -; CHECK-NEXT: movl %edi, {{[0-9]+}}(%rsp) ## 4-byte Spill +; CHECK-NEXT: movl $8, %eax ; CHECK-NEXT: movq %rdx, %rdi -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r8d ## 4-byte Reload ; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ## 8-byte Spill -; CHECK-NEXT: movl %r8d, %edx +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill ; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill ; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) ## 16-byte Spill -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill -; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill ; CHECK-NEXT: callq _calc_expected_mask_val ; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: movw %dx, %r9w -; CHECK-NEXT: movzwl %r9w, %esi +; CHECK-NEXT: movw %dx, %r8w +; CHECK-NEXT: movzwl %r8w, %esi ; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload ; CHECK-NEXT: kmovb %k0, %edi ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx ## 8-byte Reload @@ -45,25 +41,26 @@ ; CHECK-NEXT: vpmovd2m %xmm0, %k0 ; CHECK-NEXT: kmovq %k0, %k1 ; CHECK-NEXT: kmovd %k0, %esi -; CHECK-NEXT: movb %sil, %r10b -; CHECK-NEXT: movzbl %r10b, %esi -; CHECK-NEXT: movw %si, %r9w +; CHECK-NEXT: movb %sil, %r9b +; CHECK-NEXT: movzbl %r9b, %esi +; CHECK-NEXT: movw %si, %r8w ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi ## 8-byte Reload -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %esi ## 4-byte Reload +; CHECK-NEXT: movl $4, %esi +; CHECK-NEXT: movl %esi, {{[0-9]+}}(%rsp) ## 4-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %edx ## 4-byte Reload ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill ; CHECK-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill -; CHECK-NEXT: movw %r9w, {{[0-9]+}}(%rsp) ## 2-byte Spill +; CHECK-NEXT: movw %r8w, {{[0-9]+}}(%rsp) ## 2-byte Spill ; CHECK-NEXT: callq _calc_expected_mask_val -; CHECK-NEXT: movw %ax, %r9w -; CHECK-NEXT: movw {{[0-9]+}}(%rsp), %r11w ## 2-byte Reload -; CHECK-NEXT: movzwl %r11w, %edi -; CHECK-NEXT: movzwl %r9w, %esi +; CHECK-NEXT: movw %ax, %r8w +; CHECK-NEXT: movw {{[0-9]+}}(%rsp), %r10w ## 2-byte Reload +; CHECK-NEXT: movzwl %r10w, %edi +; CHECK-NEXT: movzwl %r8w, %esi ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx ## 8-byte Reload ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload ; CHECK-NEXT: callq _check_mask16 -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill -; CHECK-NEXT: addq $72, %rsp +; CHECK-NEXT: movl %eax, (%rsp) ## 4-byte Spill +; CHECK-NEXT: addq $56, %rsp ; CHECK-NEXT: retq %d2 = bitcast <2 x i64> %a to <8 x i16> %m2 = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %d2) Index: llvm/trunk/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll +++ llvm/trunk/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll @@ -24,11 +24,11 @@ ; ; X64-LABEL: test__tzcnt_u16: ; X64: # %bb.0: -; X64-NEXT: movw $16, %cx -; X64-NEXT: movzwl %di, %edx -; X64-NEXT: tzcntw %dx, %ax -; X64-NEXT: cmpl $0, %edx -; X64-NEXT: cmovew %cx, %ax +; X64-NEXT: movzwl %di, %eax +; X64-NEXT: tzcntw %ax, %cx +; X64-NEXT: cmpl $0, %eax +; X64-NEXT: movw $16, %ax +; X64-NEXT: cmovnew %cx, %ax ; X64-NEXT: retq %zext = zext i16 %a0 to i32 %cmp = icmp ne i32 %zext, 0 @@ -146,9 +146,9 @@ ; ; X64-LABEL: test__tzcnt_u32: ; X64: # %bb.0: -; X64-NEXT: movl $32, %ecx -; X64-NEXT: tzcntl %edi, %eax -; X64-NEXT: cmovbl %ecx, %eax +; X64-NEXT: tzcntl %edi, %ecx +; X64-NEXT: movl $32, %eax +; X64-NEXT: cmovael %ecx, %eax ; X64-NEXT: retq %cmp = icmp ne i32 %a0, 0 %cttz = call i32 @llvm.cttz.i32(i32 %a0, i1 true) @@ -176,11 +176,11 @@ ; ; X64-LABEL: test_tzcnt_u16: ; X64: # %bb.0: -; X64-NEXT: movw $16, %cx -; X64-NEXT: movzwl %di, %edx -; X64-NEXT: tzcntw %dx, %ax -; X64-NEXT: cmpl $0, %edx -; X64-NEXT: cmovew %cx, %ax +; X64-NEXT: movzwl %di, %eax +; X64-NEXT: tzcntw %ax, %cx +; X64-NEXT: cmpl $0, %eax +; X64-NEXT: movw $16, %ax +; X64-NEXT: cmovnew %cx, %ax ; X64-NEXT: retq %zext = zext i16 %a0 to i32 %cmp = icmp ne i32 %zext, 0 @@ -311,9 +311,9 @@ ; ; X64-LABEL: test_tzcnt_u32: ; X64: # %bb.0: -; X64-NEXT: movl $32, %ecx -; X64-NEXT: tzcntl %edi, %eax -; X64-NEXT: cmovbl %ecx, %eax +; X64-NEXT: tzcntl %edi, %ecx +; X64-NEXT: movl $32, %eax +; X64-NEXT: cmovael %ecx, %eax ; X64-NEXT: retq %cmp = icmp ne i32 %a0, 0 %cttz = call i32 @llvm.cttz.i32(i32 %a0, i1 true) Index: llvm/trunk/test/CodeGen/X86/fast-isel-call-cleanup.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fast-isel-call-cleanup.ll +++ llvm/trunk/test/CodeGen/X86/fast-isel-call-cleanup.ll @@ -6,10 +6,8 @@ %call = call i32 @targetfn(i32 42) ret void ; CHECK-LABEL: fastiselcall: -; Local value area is still there: -; CHECK: movl $42, {{%[a-z]+}} -; Fast-ISel's arg mov is not here: -; CHECK-NOT: movl $42, (%esp) +; FastISel's local value code was dead, so it's gone. +; CHECK-NOT: movl $42, ; SDag-ISel's arg mov: ; CHECK: movabsq $_targetfn, %[[REG:[^ ]*]] ; CHECK: movl $42, %edi Index: llvm/trunk/test/CodeGen/X86/fast-isel-store.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fast-isel-store.ll +++ llvm/trunk/test/CodeGen/X86/fast-isel-store.ll @@ -58,11 +58,11 @@ ; SSE64-NEXT: movdqu %xmm0, (%eax) ; SSE64-NEXT: retl ; -; AVXONLY32-LABEL: test_store_4xi32: -; AVXONLY32: # %bb.0: -; AVXONLY32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVXONLY32-NEXT: vmovdqu %xmm0, (%rdi) -; AVXONLY32-NEXT: retq +; AVX32-LABEL: test_store_4xi32: +; AVX32: # %bb.0: +; AVX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX32-NEXT: vmovdqu %xmm0, (%rdi) +; AVX32-NEXT: retq ; ; AVX64-LABEL: test_store_4xi32: ; AVX64: # %bb.0: @@ -70,18 +70,6 @@ ; AVX64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX64-NEXT: vmovdqu %xmm0, (%eax) ; AVX64-NEXT: retl -; -; KNL32-LABEL: test_store_4xi32: -; KNL32: # %bb.0: -; KNL32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; KNL32-NEXT: vmovdqu %xmm0, (%rdi) -; KNL32-NEXT: retq -; -; SKX32-LABEL: test_store_4xi32: -; SKX32: # %bb.0: -; SKX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; SKX32-NEXT: vmovdqu %xmm0, (%rdi) -; SKX32-NEXT: retq %foo = add <4 x i32> %value, %value2 ; to force integer type on store store <4 x i32> %foo, <4 x i32>* %addr, align 1 ret <4 x i32> %foo @@ -101,11 +89,11 @@ ; SSE64-NEXT: movdqa %xmm0, (%eax) ; SSE64-NEXT: retl ; -; AVXONLY32-LABEL: test_store_4xi32_aligned: -; AVXONLY32: # %bb.0: -; AVXONLY32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVXONLY32-NEXT: vmovdqa %xmm0, (%rdi) -; AVXONLY32-NEXT: retq +; AVX32-LABEL: test_store_4xi32_aligned: +; AVX32: # %bb.0: +; AVX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX32-NEXT: vmovdqa %xmm0, (%rdi) +; AVX32-NEXT: retq ; ; AVX64-LABEL: test_store_4xi32_aligned: ; AVX64: # %bb.0: @@ -113,18 +101,6 @@ ; AVX64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX64-NEXT: vmovdqa %xmm0, (%eax) ; AVX64-NEXT: retl -; -; KNL32-LABEL: test_store_4xi32_aligned: -; KNL32: # %bb.0: -; KNL32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; KNL32-NEXT: vmovdqa %xmm0, (%rdi) -; KNL32-NEXT: retq -; -; SKX32-LABEL: test_store_4xi32_aligned: -; SKX32: # %bb.0: -; SKX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; SKX32-NEXT: vmovdqa %xmm0, (%rdi) -; SKX32-NEXT: retq %foo = add <4 x i32> %value, %value2 ; to force integer type on store store <4 x i32> %foo, <4 x i32>* %addr, align 16 ret <4 x i32> %foo Index: llvm/trunk/test/CodeGen/X86/inreg.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/inreg.ll +++ llvm/trunk/test/CodeGen/X86/inreg.ll @@ -20,7 +20,7 @@ ; FAST-LABEL: g1: ; FAST: subl $[[AMT:.*]], %esp - ; FAST-NEXT: leal 8(%esp), %eax + ; FAST-NEXT: leal 16(%esp), %eax ; FAST-NEXT: movl $41, %edx ; FAST-NEXT: movl $42, %ecx ; FAST: $43, (%esp) Index: llvm/trunk/test/CodeGen/X86/pr32241.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr32241.ll +++ llvm/trunk/test/CodeGen/X86/pr32241.ll @@ -4,19 +4,16 @@ define i32 @_Z3foov() { ; CHECK-LABEL: _Z3foov: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: subl $16, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: .cfi_offset %esi, -8 -; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: .cfi_def_cfa_offset 20 ; CHECK-NEXT: movw $10959, {{[0-9]+}}(%esp) # imm = 0x2ACF ; CHECK-NEXT: movw $-15498, {{[0-9]+}}(%esp) # imm = 0xC376 ; CHECK-NEXT: movw $19417, {{[0-9]+}}(%esp) # imm = 0x4BD9 -; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: cmpw $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp) # 1-byte Spill +; CHECK-NEXT: movb $1, %cl +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; CHECK-NEXT: movb %cl, {{[0-9]+}}(%esp) # 1-byte Spill ; CHECK-NEXT: jne .LBB0_2 ; CHECK-NEXT: # %bb.1: # %lor.rhs ; CHECK-NEXT: xorl %eax, %eax @@ -25,17 +22,17 @@ ; CHECK-NEXT: jmp .LBB0_2 ; CHECK-NEXT: .LBB0_2: # %lor.end ; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al # 1-byte Reload -; CHECK-NEXT: movb $1, %cl ; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %edx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; CHECK-NEXT: cmpl %edx, %esi +; CHECK-NEXT: movzbl %al, %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload +; CHECK-NEXT: cmpl %ecx, %edx ; CHECK-NEXT: setl %al ; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %edx -; CHECK-NEXT: xorl $-1, %edx -; CHECK-NEXT: cmpl $0, %edx -; CHECK-NEXT: movb %cl, {{[0-9]+}}(%esp) # 1-byte Spill +; CHECK-NEXT: movzbl %al, %ecx +; CHECK-NEXT: xorl $-1, %ecx +; CHECK-NEXT: cmpl $0, %ecx +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp) # 1-byte Spill ; CHECK-NEXT: jne .LBB0_4 ; CHECK-NEXT: # %bb.3: # %lor.rhs4 ; CHECK-NEXT: xorl %eax, %eax @@ -50,7 +47,6 @@ ; CHECK-NEXT: movw %dx, {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: addl $16, %esp -; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl entry: %aa = alloca i16, align 2 Index: llvm/trunk/test/CodeGen/X86/pr32284.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr32284.ll +++ llvm/trunk/test/CodeGen/X86/pr32284.ll @@ -121,10 +121,10 @@ define void @f1() { ; X86-O0-LABEL: f1: ; X86-O0: # %bb.0: # %entry -; X86-O0-NEXT: movabsq $8381627093, %rax # imm = 0x1F3957AD5 -; X86-O0-NEXT: movslq var_5, %rcx -; X86-O0-NEXT: addq %rax, %rcx -; X86-O0-NEXT: cmpq $0, %rcx +; X86-O0-NEXT: movslq var_5, %rax +; X86-O0-NEXT: movabsq $8381627093, %rcx # imm = 0x1F3957AD5 +; X86-O0-NEXT: addq %rcx, %rax +; X86-O0-NEXT: cmpq $0, %rax ; X86-O0-NEXT: setne %dl ; X86-O0-NEXT: andb $1, %dl ; X86-O0-NEXT: movb %dl, -{{[0-9]+}}(%rsp) @@ -308,30 +308,30 @@ define void @f2() { ; X86-O0-LABEL: f2: ; X86-O0: # %bb.0: # %entry -; X86-O0-NEXT: # implicit-def: $rax -; X86-O0-NEXT: movzbl var_7, %ecx +; X86-O0-NEXT: movzbl var_7, %eax ; X86-O0-NEXT: cmpb $0, var_7 -; X86-O0-NEXT: setne %dl -; X86-O0-NEXT: xorb $-1, %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %esi -; X86-O0-NEXT: xorl %esi, %ecx -; X86-O0-NEXT: movw %cx, %di -; X86-O0-NEXT: movw %di, -{{[0-9]+}}(%rsp) -; X86-O0-NEXT: movzbl var_7, %ecx -; X86-O0-NEXT: movw %cx, %di -; X86-O0-NEXT: cmpw $0, %di -; X86-O0-NEXT: setne %dl -; X86-O0-NEXT: xorb $-1, %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %ecx -; X86-O0-NEXT: movzbl var_7, %esi -; X86-O0-NEXT: cmpl %esi, %ecx -; X86-O0-NEXT: sete %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %ecx -; X86-O0-NEXT: movw %cx, %di -; X86-O0-NEXT: movw %di, (%rax) +; X86-O0-NEXT: setne %cl +; X86-O0-NEXT: xorb $-1, %cl +; X86-O0-NEXT: andb $1, %cl +; X86-O0-NEXT: movzbl %cl, %edx +; X86-O0-NEXT: xorl %edx, %eax +; X86-O0-NEXT: movw %ax, %si +; X86-O0-NEXT: movw %si, -{{[0-9]+}}(%rsp) +; X86-O0-NEXT: movzbl var_7, %eax +; X86-O0-NEXT: movw %ax, %si +; X86-O0-NEXT: cmpw $0, %si +; X86-O0-NEXT: setne %cl +; X86-O0-NEXT: xorb $-1, %cl +; X86-O0-NEXT: andb $1, %cl +; X86-O0-NEXT: movzbl %cl, %eax +; X86-O0-NEXT: movzbl var_7, %edx +; X86-O0-NEXT: cmpl %edx, %eax +; X86-O0-NEXT: sete %cl +; X86-O0-NEXT: andb $1, %cl +; X86-O0-NEXT: movzbl %cl, %eax +; X86-O0-NEXT: movw %ax, %si +; X86-O0-NEXT: # implicit-def: $rdi +; X86-O0-NEXT: movw %si, (%rdi) ; X86-O0-NEXT: retq ; ; X64-LABEL: f2: @@ -353,41 +353,37 @@ ; ; 686-O0-LABEL: f2: ; 686-O0: # %bb.0: # %entry -; 686-O0-NEXT: pushl %edi -; 686-O0-NEXT: .cfi_def_cfa_offset 8 ; 686-O0-NEXT: pushl %esi -; 686-O0-NEXT: .cfi_def_cfa_offset 12 +; 686-O0-NEXT: .cfi_def_cfa_offset 8 ; 686-O0-NEXT: subl $2, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 14 -; 686-O0-NEXT: .cfi_offset %esi, -12 -; 686-O0-NEXT: .cfi_offset %edi, -8 -; 686-O0-NEXT: # implicit-def: $eax -; 686-O0-NEXT: movzbl var_7, %ecx +; 686-O0-NEXT: .cfi_def_cfa_offset 10 +; 686-O0-NEXT: .cfi_offset %esi, -8 +; 686-O0-NEXT: movzbl var_7, %eax ; 686-O0-NEXT: cmpb $0, var_7 -; 686-O0-NEXT: setne %dl -; 686-O0-NEXT: xorb $-1, %dl -; 686-O0-NEXT: andb $1, %dl -; 686-O0-NEXT: movzbl %dl, %esi -; 686-O0-NEXT: xorl %esi, %ecx -; 686-O0-NEXT: movw %cx, %di -; 686-O0-NEXT: movw %di, (%esp) -; 686-O0-NEXT: movzbl var_7, %ecx -; 686-O0-NEXT: movw %cx, %di -; 686-O0-NEXT: cmpw $0, %di -; 686-O0-NEXT: setne %dl -; 686-O0-NEXT: xorb $-1, %dl -; 686-O0-NEXT: andb $1, %dl -; 686-O0-NEXT: movzbl %dl, %ecx -; 686-O0-NEXT: movzbl var_7, %esi -; 686-O0-NEXT: cmpl %esi, %ecx -; 686-O0-NEXT: sete %dl -; 686-O0-NEXT: andb $1, %dl -; 686-O0-NEXT: movzbl %dl, %ecx -; 686-O0-NEXT: movw %cx, %di -; 686-O0-NEXT: movw %di, (%eax) +; 686-O0-NEXT: setne %cl +; 686-O0-NEXT: xorb $-1, %cl +; 686-O0-NEXT: andb $1, %cl +; 686-O0-NEXT: movzbl %cl, %edx +; 686-O0-NEXT: xorl %edx, %eax +; 686-O0-NEXT: movw %ax, %si +; 686-O0-NEXT: movw %si, (%esp) +; 686-O0-NEXT: movzbl var_7, %eax +; 686-O0-NEXT: movw %ax, %si +; 686-O0-NEXT: cmpw $0, %si +; 686-O0-NEXT: setne %cl +; 686-O0-NEXT: xorb $-1, %cl +; 686-O0-NEXT: andb $1, %cl +; 686-O0-NEXT: movzbl %cl, %eax +; 686-O0-NEXT: movzbl var_7, %edx +; 686-O0-NEXT: cmpl %edx, %eax +; 686-O0-NEXT: sete %cl +; 686-O0-NEXT: andb $1, %cl +; 686-O0-NEXT: movzbl %cl, %eax +; 686-O0-NEXT: movw %ax, %si +; 686-O0-NEXT: # implicit-def: $eax +; 686-O0-NEXT: movw %si, (%eax) ; 686-O0-NEXT: addl $2, %esp ; 686-O0-NEXT: popl %esi -; 686-O0-NEXT: popl %edi ; 686-O0-NEXT: retl ; ; 686-LABEL: f2: Index: llvm/trunk/test/CodeGen/X86/pr32340.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr32340.ll +++ llvm/trunk/test/CodeGen/X86/pr32340.ll @@ -15,31 +15,31 @@ ; X64: # %bb.0: # %entry ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movabsq $-1142377792914660288, %rdx # imm = 0xF02575732E06E440 ; X64-NEXT: movw $0, var_825 ; X64-NEXT: movzwl var_32, %eax -; X64-NEXT: movzwl var_901, %esi -; X64-NEXT: movl %eax, %edi -; X64-NEXT: xorl %esi, %edi +; X64-NEXT: movzwl var_901, %edx ; X64-NEXT: movl %eax, %esi -; X64-NEXT: xorl %edi, %esi -; X64-NEXT: addl %eax, %esi -; X64-NEXT: movslq %esi, %r8 -; X64-NEXT: movq %r8, var_826 +; X64-NEXT: xorl %edx, %esi +; X64-NEXT: movl %eax, %edx +; X64-NEXT: xorl %esi, %edx +; X64-NEXT: addl %eax, %edx +; X64-NEXT: movslq %edx, %rdi +; X64-NEXT: movq %rdi, var_826 ; X64-NEXT: movzwl var_32, %eax -; X64-NEXT: movl %eax, %r8d +; X64-NEXT: movl %eax, %edi ; X64-NEXT: movzwl var_901, %eax ; X64-NEXT: xorl $51981, %eax # imm = 0xCB0D -; X64-NEXT: movslq %eax, %r9 -; X64-NEXT: xorq %rdx, %r9 -; X64-NEXT: movq %r8, %rdx -; X64-NEXT: xorq %r9, %rdx -; X64-NEXT: xorq $-1, %rdx -; X64-NEXT: xorq %rdx, %r8 -; X64-NEXT: movq %r8, %rdx -; X64-NEXT: orq var_57, %rdx -; X64-NEXT: orq %rdx, %r8 -; X64-NEXT: movw %r8w, %r10w +; X64-NEXT: movslq %eax, %r8 +; X64-NEXT: movabsq $-1142377792914660288, %r9 # imm = 0xF02575732E06E440 +; X64-NEXT: xorq %r9, %r8 +; X64-NEXT: movq %rdi, %r9 +; X64-NEXT: xorq %r8, %r9 +; X64-NEXT: xorq $-1, %r9 +; X64-NEXT: xorq %r9, %rdi +; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: orq var_57, %r8 +; X64-NEXT: orq %r8, %rdi +; X64-NEXT: movw %di, %r10w ; X64-NEXT: movw %r10w, var_900 ; X64-NEXT: cmpq var_28, %rcx ; X64-NEXT: setne %r11b Index: llvm/trunk/test/CodeGen/X86/pr32345.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr32345.ll +++ llvm/trunk/test/CodeGen/X86/pr32345.ll @@ -10,28 +10,28 @@ define void @foo() { ; X640-LABEL: foo: ; X640: # %bb.0: # %bb -; X640-NEXT: # implicit-def: $rax -; X640-NEXT: movzwl var_22, %ecx -; X640-NEXT: movzwl var_27, %edx -; X640-NEXT: xorl %edx, %ecx -; X640-NEXT: movzwl var_27, %edx -; X640-NEXT: xorl %edx, %ecx -; X640-NEXT: movslq %ecx, %rsi -; X640-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) -; X640-NEXT: movzwl var_22, %ecx -; X640-NEXT: movzwl var_27, %edx -; X640-NEXT: xorl %edx, %ecx -; X640-NEXT: movzwl var_27, %edx -; X640-NEXT: xorl %edx, %ecx -; X640-NEXT: movslq %ecx, %rsi +; X640-NEXT: movzwl var_22, %eax ; X640-NEXT: movzwl var_27, %ecx -; X640-NEXT: subl $16610, %ecx # imm = 0x40E2 -; X640-NEXT: movl %ecx, %ecx -; X640-NEXT: # kill: def $rcx killed $ecx +; X640-NEXT: xorl %ecx, %eax +; X640-NEXT: movzwl var_27, %ecx +; X640-NEXT: xorl %ecx, %eax +; X640-NEXT: movslq %eax, %rdx +; X640-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; X640-NEXT: movzwl var_22, %eax +; X640-NEXT: movzwl var_27, %ecx +; X640-NEXT: xorl %ecx, %eax +; X640-NEXT: movzwl var_27, %ecx +; X640-NEXT: xorl %ecx, %eax +; X640-NEXT: movslq %eax, %rdx +; X640-NEXT: movzwl var_27, %eax +; X640-NEXT: subl $16610, %eax # imm = 0x40E2 +; X640-NEXT: movl %eax, %eax +; X640-NEXT: movl %eax, %ecx ; X640-NEXT: # kill: def $cl killed $rcx -; X640-NEXT: sarq %cl, %rsi -; X640-NEXT: movb %sil, %cl -; X640-NEXT: movb %cl, (%rax) +; X640-NEXT: sarq %cl, %rdx +; X640-NEXT: movb %dl, %cl +; X640-NEXT: # implicit-def: $rdx +; X640-NEXT: movb %cl, (%rdx) ; X640-NEXT: retq ; ; 6860-LABEL: foo: @@ -49,36 +49,36 @@ ; 6860-NEXT: .cfi_offset %esi, -20 ; 6860-NEXT: .cfi_offset %edi, -16 ; 6860-NEXT: .cfi_offset %ebx, -12 -; 6860-NEXT: # implicit-def: $eax -; 6860-NEXT: movw var_22, %cx -; 6860-NEXT: movzwl var_27, %edx -; 6860-NEXT: movw %dx, %si -; 6860-NEXT: xorw %si, %cx -; 6860-NEXT: # implicit-def: $edi -; 6860-NEXT: movw %cx, %di -; 6860-NEXT: xorl %edx, %edi -; 6860-NEXT: movw %di, %cx -; 6860-NEXT: movzwl %cx, %edx -; 6860-NEXT: movl %edx, {{[0-9]+}}(%esp) +; 6860-NEXT: movw var_22, %ax +; 6860-NEXT: movzwl var_27, %ecx +; 6860-NEXT: movw %cx, %dx +; 6860-NEXT: xorw %dx, %ax +; 6860-NEXT: # implicit-def: $esi +; 6860-NEXT: movw %ax, %si +; 6860-NEXT: xorl %ecx, %esi +; 6860-NEXT: movw %si, %ax +; 6860-NEXT: movzwl %ax, %ecx +; 6860-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; 6860-NEXT: movl $0, {{[0-9]+}}(%esp) -; 6860-NEXT: movw var_22, %cx -; 6860-NEXT: movzwl var_27, %edx -; 6860-NEXT: movw %dx, %si -; 6860-NEXT: xorw %si, %cx -; 6860-NEXT: # implicit-def: $edi -; 6860-NEXT: movw %cx, %di -; 6860-NEXT: xorl %edx, %edi -; 6860-NEXT: movw %di, %cx -; 6860-NEXT: movzwl %cx, %edi -; 6860-NEXT: addl $-16610, %edx # imm = 0xBF1E -; 6860-NEXT: movb %dl, %bl -; 6860-NEXT: xorl %edx, %edx +; 6860-NEXT: movw var_22, %ax +; 6860-NEXT: movzwl var_27, %ecx +; 6860-NEXT: movw %cx, %dx +; 6860-NEXT: xorw %dx, %ax +; 6860-NEXT: # implicit-def: $esi +; 6860-NEXT: movw %ax, %si +; 6860-NEXT: xorl %ecx, %esi +; 6860-NEXT: movw %si, %ax +; 6860-NEXT: movzwl %ax, %esi +; 6860-NEXT: addl $-16610, %ecx # imm = 0xBF1E +; 6860-NEXT: movb %cl, %bl +; 6860-NEXT: xorl %ecx, %ecx +; 6860-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; 6860-NEXT: movb %bl, %cl -; 6860-NEXT: shrdl %cl, %edx, %edi +; 6860-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; 6860-NEXT: shrdl %cl, %edi, %esi ; 6860-NEXT: testb $32, %bl -; 6860-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; 6860-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill -; 6860-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; 6860-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill ; 6860-NEXT: jne .LBB0_2 ; 6860-NEXT: # %bb.1: # %bb ; 6860-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload @@ -86,7 +86,7 @@ ; 6860-NEXT: .LBB0_2: # %bb ; 6860-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; 6860-NEXT: movb %al, %cl -; 6860-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; 6860-NEXT: # implicit-def: $eax ; 6860-NEXT: movb %cl, (%eax) ; 6860-NEXT: leal -12(%ebp), %esp ; 6860-NEXT: popl %esi Index: llvm/trunk/test/CodeGen/X86/pr32484.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr32484.ll +++ llvm/trunk/test/CodeGen/X86/pr32484.ll @@ -7,9 +7,9 @@ ; CHECK-NEXT: # implicit-def: $rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: .LBB0_1: -; CHECK-NEXT: # implicit-def: $rax ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: # implicit-def: $rax ; CHECK-NEXT: movdqu %xmm1, (%rax) ; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # 16-byte Spill ; CHECK-NEXT: .LBB0_2: Index: llvm/trunk/test/CodeGen/X86/sink-local-value.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sink-local-value.ll +++ llvm/trunk/test/CodeGen/X86/sink-local-value.ll @@ -0,0 +1,210 @@ +; RUN: llc -O0 < %s | FileCheck %s + +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i386-linux-gnu" + +; Try some simple cases that show how local value sinking improves line tables. + +@sink_across = external global i32 + +declare void @simple_callee(i32, i32) + +define void @simple() !dbg !5 { + store i32 44, i32* @sink_across, !dbg !7 + call void @simple_callee(i32 13, i32 55), !dbg !8 + ret void, !dbg !9 +} + +; CHECK-LABEL: simple: +; CHECK-NOT: movl $13, +; CHECK: .loc 1 1 1 prologue_end +; CHECK: movl $44, sink_across +; CHECK: .loc 1 2 1 +; CHECK: movl $13, +; CHECK: movl $55, +; CHECK: calll simple_callee + +declare void @simple_reg_callee(i32 inreg, i32 inreg) + +define void @simple_reg() !dbg !10 { + store i32 44, i32* @sink_across, !dbg !11 + call void @simple_reg_callee(i32 inreg 13, i32 inreg 55), !dbg !12 + ret void, !dbg !13 +} + +; CHECK-LABEL: simple_reg: +; CHECK: .loc 1 4 1 prologue_end +; CHECK: movl $44, sink_across +; CHECK: .loc 1 5 1 +; CHECK: movl $13, +; CHECK: movl $55, +; CHECK: calll simple_reg_callee + +; There are two interesting cases where local values have no uses but are not +; dead: when the local value is directly used by a phi, and when the local +; value is used by a no-op cast instruction. In these cases, we get side tables +; referring to the local value vreg that we need to check. + +define i8* @phi_const(i32 %c) !dbg !14 { +entry: + %tobool = icmp eq i32 %c, 0, !dbg !20 + call void @llvm.dbg.value(metadata i1 %tobool, metadata !16, metadata !DIExpression()), !dbg !20 + br i1 %tobool, label %if.else, label %if.then, !dbg !21 + +if.then: ; preds = %entry + br label %if.end, !dbg !22 + +if.else: ; preds = %entry + br label %if.end, !dbg !23 + +if.end: ; preds = %if.else, %if.then + %r.0 = phi i8* [ inttoptr (i32 42 to i8*), %if.then ], [ inttoptr (i32 1 to i8*), %if.else ], !dbg !24 + call void @llvm.dbg.value(metadata i8* %r.0, metadata !18, metadata !DIExpression()), !dbg !24 + ret i8* %r.0, !dbg !25 +} + +; CHECK-LABEL: phi_const: +; CHECK: # %entry +; CHECK: cmpl $0, +; CHECK: # %if.then +; CHECK: movl $42, +; CHECK: jmp +; CHECK: # %if.else +; CHECK: movl $1, +; CHECK: # %if.end + +define i8* @phi_const_cast(i32 %c) !dbg !26 { +entry: + %tobool = icmp eq i32 %c, 0, !dbg !32 + call void @llvm.dbg.value(metadata i1 %tobool, metadata !28, metadata !DIExpression()), !dbg !32 + br i1 %tobool, label %if.else, label %if.then, !dbg !33 + +if.then: ; preds = %entry + %v42 = inttoptr i32 42 to i8*, !dbg !34 + call void @llvm.dbg.value(metadata i8* %v42, metadata !29, metadata !DIExpression()), !dbg !34 + br label %if.end, !dbg !35 + +if.else: ; preds = %entry + %v1 = inttoptr i32 1 to i8*, !dbg !36 + call void @llvm.dbg.value(metadata i8* %v1, metadata !30, metadata !DIExpression()), !dbg !36 + br label %if.end, !dbg !37 + +if.end: ; preds = %if.else, %if.then + %r.0 = phi i8* [ %v42, %if.then ], [ %v1, %if.else ], !dbg !38 + call void @llvm.dbg.value(metadata i8* %r.0, metadata !31, metadata !DIExpression()), !dbg !38 + ret i8* %r.0, !dbg !39 +} + +; CHECK-LABEL: phi_const_cast: +; CHECK: # %entry +; CHECK: cmpl $0, +; CHECK: # %if.then +; CHECK: movl $42, %[[REG:[a-z]+]] +; CHECK: #DEBUG_VALUE: phi_const_cast:4 <- $[[REG]] +; CHECK: jmp +; CHECK: # %if.else +; CHECK: movl $1, %[[REG:[a-z]+]] +; CHECK: #DEBUG_VALUE: phi_const_cast:5 <- $[[REG]] +; CHECK: # %if.end + +declare void @may_throw() local_unnamed_addr #1 + +declare i32 @__gxx_personality_v0(...) + +define i32 @invoke_phi() personality i32 (...)* @__gxx_personality_v0 { +entry: + store i32 42, i32* @sink_across + invoke void @may_throw() + to label %try.cont unwind label %lpad + +lpad: ; preds = %entry + %0 = landingpad { i8*, i32 } + catch i8* null + store i32 42, i32* @sink_across + br label %try.cont + +try.cont: ; preds = %entry, %lpad + %r.0 = phi i32 [ 13, %entry ], [ 55, %lpad ] + ret i32 %r.0 +} + +; The constant materialization should be *after* the stores to sink_across, but +; before any EH_LABEL. + +; CHECK-LABEL: invoke_phi: +; CHECK: movl $42, sink_across +; CHECK: movl $13, %{{[a-z]*}} +; CHECK: .Ltmp{{.*}}: +; CHECK: calll may_throw +; CHECK: .Ltmp{{.*}}: +; CHECK: jmp .LBB{{.*}} +; CHECK: .LBB{{.*}}: # %lpad +; CHECK: movl $42, sink_across +; CHECK: movl $55, %{{[a-z]*}} +; CHECK: .LBB{{.*}}: # %try.cont +; CHECK: retl + + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.value(metadata, metadata, metadata) #0 + +attributes #0 = { nounwind readnone speculatable } + +!llvm.dbg.cu = !{!0} +!llvm.debugify = !{!3, !4} +!llvm.module.flags = !{!52, !53} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "../llvm/test/CodeGen/X86/sink-local-value.ll", directory: "/") +!2 = !{} +!3 = !{i32 27} +!4 = !{i32 8} +!5 = distinct !DISubprogram(name: "simple", linkageName: "simple", scope: null, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, variables: !2) +!6 = !DISubroutineType(types: !2) +!7 = !DILocation(line: 1, column: 1, scope: !5) +!8 = !DILocation(line: 2, column: 1, scope: !5) +!9 = !DILocation(line: 3, column: 1, scope: !5) +!10 = distinct !DISubprogram(name: "simple_reg", linkageName: "simple_reg", scope: null, file: !1, line: 4, type: !6, isLocal: false, isDefinition: true, scopeLine: 4, isOptimized: true, unit: !0, variables: !2) +!11 = !DILocation(line: 4, column: 1, scope: !10) +!12 = !DILocation(line: 5, column: 1, scope: !10) +!13 = !DILocation(line: 6, column: 1, scope: !10) +!14 = distinct !DISubprogram(name: "phi_const", linkageName: "phi_const", scope: null, file: !1, line: 7, type: !6, isLocal: false, isDefinition: true, scopeLine: 7, isOptimized: true, unit: !0, variables: !15) +!15 = !{!16, !18} +!16 = !DILocalVariable(name: "1", scope: !14, file: !1, line: 7, type: !17) +!17 = !DIBasicType(name: "ty8", size: 8, encoding: DW_ATE_unsigned) +!18 = !DILocalVariable(name: "2", scope: !14, file: !1, line: 11, type: !19) +!19 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned) +!20 = !DILocation(line: 7, column: 1, scope: !14) +!21 = !DILocation(line: 8, column: 1, scope: !14) +!22 = !DILocation(line: 9, column: 1, scope: !14) +!23 = !DILocation(line: 10, column: 1, scope: !14) +!24 = !DILocation(line: 11, column: 1, scope: !14) +!25 = !DILocation(line: 12, column: 1, scope: !14) +!26 = distinct !DISubprogram(name: "phi_const_cast", linkageName: "phi_const_cast", scope: null, file: !1, line: 13, type: !6, isLocal: false, isDefinition: true, scopeLine: 13, isOptimized: true, unit: !0, variables: !27) +!27 = !{!28, !29, !30, !31} +!28 = !DILocalVariable(name: "3", scope: !26, file: !1, line: 13, type: !17) +!29 = !DILocalVariable(name: "4", scope: !26, file: !1, line: 15, type: !19) +!30 = !DILocalVariable(name: "5", scope: !26, file: !1, line: 17, type: !19) +!31 = !DILocalVariable(name: "6", scope: !26, file: !1, line: 19, type: !19) +!32 = !DILocation(line: 13, column: 1, scope: !26) +!33 = !DILocation(line: 14, column: 1, scope: !26) +!34 = !DILocation(line: 15, column: 1, scope: !26) +!35 = !DILocation(line: 16, column: 1, scope: !26) +!36 = !DILocation(line: 17, column: 1, scope: !26) +!37 = !DILocation(line: 18, column: 1, scope: !26) +!38 = !DILocation(line: 19, column: 1, scope: !26) +!39 = !DILocation(line: 20, column: 1, scope: !26) +!40 = distinct !DISubprogram(name: "invoke_phi", linkageName: "invoke_phi", scope: null, file: !1, line: 21, type: !6, isLocal: false, isDefinition: true, scopeLine: 21, isOptimized: true, unit: !0, variables: !41) +!41 = !{!42, !44} +!42 = !DILocalVariable(name: "7", scope: !40, file: !1, line: 23, type: !43) +!43 = !DIBasicType(name: "ty64", size: 64, encoding: DW_ATE_unsigned) +!44 = !DILocalVariable(name: "8", scope: !40, file: !1, line: 26, type: !19) +!45 = !DILocation(line: 21, column: 1, scope: !40) +!46 = !DILocation(line: 22, column: 1, scope: !40) +!47 = !DILocation(line: 23, column: 1, scope: !40) +!48 = !DILocation(line: 24, column: 1, scope: !40) +!49 = !DILocation(line: 25, column: 1, scope: !40) +!50 = !DILocation(line: 26, column: 1, scope: !40) +!51 = !DILocation(line: 27, column: 1, scope: !40) +!52 = !{i32 2, !"Dwarf Version", i32 4} +!53 = !{i32 2, !"Debug Info Version", i32 3} Index: llvm/trunk/test/CodeGen/X86/sse-intrinsics-fast-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse-intrinsics-fast-isel.ll +++ llvm/trunk/test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -1485,8 +1485,8 @@ ; ; X64-LABEL: test_mm_setcsr: ; X64: # %bb.0: -; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-NEXT: ldmxcsr (%rax) ; X64-NEXT: retq %st = alloca i32, align 4 Index: llvm/trunk/test/CodeGen/X86/win32_sret.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/win32_sret.ll +++ llvm/trunk/test/CodeGen/X86/win32_sret.ll @@ -137,9 +137,9 @@ ; Load the address of the result and put it onto stack ; The this pointer goes to ECX. ; (through %ecx in the -O0 build). -; WIN32: leal {{[0-9]*}}(%esp), %e{{[a-d]}}x -; WIN32: {{leal [1-9]+\(%esp\)|movl %esp}}, %ecx -; WIN32: {{pushl %e[a-d]x|movl %e[a-d]x, \(%esp\)}} +; WIN32-DAG: leal {{[0-9]*}}(%esp), %e{{[a-d]}}x +; WIN32-DAG: {{leal [1-9]+\(%esp\)|movl %esp}}, %ecx +; WIN32-DAG: {{pushl %e[a-d]x|movl %e[a-d]x, \(%esp\)}} ; WIN32-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ" ; WIN32: retl ret void @@ -154,21 +154,21 @@ ; LINUX-LABEL: test6_f: ; The %x argument is moved to %ecx. It will be the this pointer. -; WIN32: movl {{16|20}}(%esp), %ecx +; WIN32-DAG: movl {{16|20}}(%esp), %ecx ; The sret pointer is (%esp) -; WIN32: {{leal 4\(%esp\)|movl %esp}}, %eax -; WIN32-NEXT: {{pushl %eax|movl %eax, \(%esp\)}} +; WIN32-DAG: {{leal 4\(%esp\)|movl %esp}}, %eax +; WIN32-DAG: {{pushl %eax|movl %eax, \(%esp\)}} ; The sret pointer is %ecx ; The %x argument is moved to (%esp). It will be the this pointer. -; MINGW_X86: {{leal 4\(%esp\)|movl %esp}}, %ecx -; MINGW_X86-NEXT: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}} +; MINGW_X86-DAG: {{leal 4\(%esp\)|movl %esp}}, %ecx +; MINGW_X86-DAG: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}} ; MINGW_X86-NEXT: calll _test6_g -; CYGWIN: {{leal 4\(%esp\)|movl %esp}}, %ecx -; CYGWIN-NEXT: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}} +; CYGWIN-DAG: {{leal 4\(%esp\)|movl %esp}}, %ecx +; CYGWIN-DAG: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}} ; CYGWIN-NEXT: calll _test6_g %tmp = alloca %struct.test6, align 4 Index: llvm/trunk/test/DebugInfo/COFF/lines-bb-start.ll =================================================================== --- llvm/trunk/test/DebugInfo/COFF/lines-bb-start.ll +++ llvm/trunk/test/DebugInfo/COFF/lines-bb-start.ll @@ -92,6 +92,7 @@ ; CHECK: LBB2_{{.*}}: # %if.end ; CHECK-NEXT: .cv_loc {{.*}} # t.c:5:3 ; CHECK: leal 4(%esp), %[[reg:[^ ]*]] +; CHECK: #DEBUG_VALUE: lea_dbg_value:v <- [DW_OP_deref] $[[reg]] ; CHECK: movl %[[reg]], (%esp) ; CHECK: calll _use_i32 Index: llvm/trunk/test/DebugInfo/Mips/delay-slot.ll =================================================================== --- llvm/trunk/test/DebugInfo/Mips/delay-slot.ll +++ llvm/trunk/test/DebugInfo/Mips/delay-slot.ll @@ -13,11 +13,9 @@ ; CHECK: Address Line Column File ISA Discriminator Flags ; CHECK: ------------------ ------ ------ ------ --- ------------- ------------- ; CHECK: 0x0000000000000000 1 0 1 0 0 is_stmt -; FIXME: The next address probably ought to be 0x0000000000000004 but there's -; a constant initialization before the prologue's end. -; CHECK: 0x0000000000000008 2 0 1 0 0 is_stmt prologue_end -; CHECK: 0x000000000000002c 3 0 1 0 0 is_stmt -; CHECK: 0x000000000000003c 4 0 1 0 0 is_stmt +; CHECK: 0x0000000000000004 2 0 1 0 0 is_stmt prologue_end +; CHECK: 0x0000000000000024 3 0 1 0 0 is_stmt +; CHECK: 0x0000000000000034 4 0 1 0 0 is_stmt ; CHECK: 0x0000000000000048 5 0 1 0 0 is_stmt ; CHECK: 0x0000000000000058 5 0 1 0 0 is_stmt end_sequence Index: llvm/trunk/test/DebugInfo/X86/debug-loc-asan.ll =================================================================== --- llvm/trunk/test/DebugInfo/X86/debug-loc-asan.ll +++ llvm/trunk/test/DebugInfo/X86/debug-loc-asan.ll @@ -14,8 +14,8 @@ ; The address of the (potentially now malloc'ed) alloca ends up ; in rdi, after which it is spilled to the stack. We record the ; spill OFFSET on the stack for checking the debug info below. -; CHECK: #DEBUG_VALUE: bar:y <- [DW_OP_deref] [$rdi+0] -; CHECK: movq %rdi, [[OFFSET:[0-9]+]](%rsp) +; CHECK: #DEBUG_VALUE: bar:y <- [DW_OP_deref] [$rcx+0] +; CHECK: movq %rcx, [[OFFSET:[0-9]+]](%rsp) ; CHECK-NEXT: [[START_LABEL:.Ltmp[0-9]+]] ; CHECK-NEXT: #DEBUG_VALUE: bar:y <- [DW_OP_plus_uconst [[OFFSET]], DW_OP_deref, DW_OP_deref] ; This location should be valid until the end of the function. @@ -26,13 +26,13 @@ ; CHECK: .Ldebug_loc{{[0-9]+}}: ; We expect two location ranges for the variable. -; First, its address is stored in %rdi: +; First, its address is stored in %rcx: ; CHECK: .quad .Lfunc_begin0-.Lfunc_begin0 ; CHECK-NEXT: .quad [[START_LABEL]]-.Lfunc_begin0 -; CHECK: DW_OP_breg5 +; CHECK: DW_OP_breg2 ; DWARF: DW_TAG_formal_parameter ; DWARF: DW_AT_location -; DWARF-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0, DW_OP_deref +; DWARF-NEXT: [{{.*}}, {{.*}}): DW_OP_breg2 RCX+0, DW_OP_deref ; Then it's addressed via %rsp: ; CHECK: .quad [[START_LABEL]]-.Lfunc_begin0 Index: llvm/trunk/test/DebugInfo/X86/prologue-stack.ll =================================================================== --- llvm/trunk/test/DebugInfo/X86/prologue-stack.ll +++ llvm/trunk/test/DebugInfo/X86/prologue-stack.ll @@ -11,6 +11,8 @@ ; CHECK: isel_line_test2: ; CHECK: {{subq|leaq}} {{.*}}, %rsp ; CHECK: .loc 1 5 3 prologue_end + ; CHECK: movl $400, %edi + ; CHECK: callq callme entry: %call = call i32 @callme(i32 400), !dbg !10 ret i32 0, !dbg !12