Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -78,19 +78,20 @@ static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator &InsertPt, + MachineBasicBlock::iterator &InsertAddr, + MachineBasicBlock::iterator &InsertStore, DebugLoc DL) { auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer"); unsigned SPAddr = MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass); const auto *TII = MF.getSubtarget().getInstrInfo(); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPAddr) + BuildMI(MBB, InsertAddr, DL, TII->get(WebAssembly::CONST_I32), SPAddr) .addExternalSymbol(SPSymbol); auto *MMO = new MachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore, 4, 4); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), - WebAssembly::SP32) + BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32), + SrcReg) .addImm(0) .addReg(SPAddr) .addImm(2) // p2align @@ -108,7 +109,7 @@ if (I->getOpcode() == TII->getCallFrameDestroyOpcode() && needsSPWriteback(MF, *MF.getFrameInfo())) { DebugLoc DL = I->getDebugLoc(); - writeSPToMemory(WebAssembly::SP32, MF, MBB, I, DL); + writeSPToMemory(WebAssembly::SP32, MF, MBB, I, I, DL); } MBB.erase(I); } @@ -171,7 +172,7 @@ .addReg(WebAssembly::SP32); } if (StackSize && needsSPWriteback(MF, *MFI)) { - writeSPToMemory(WebAssembly::SP32, MF, MBB, InsertPt, DL); + writeSPToMemory(WebAssembly::SP32, MF, MBB, InsertPt, InsertPt, DL); } } @@ -192,18 +193,24 @@ // Restore the stack pointer. If we had fixed-size locals, add the offset // subtracted in the prolog. + unsigned SPReg = 0; + MachineBasicBlock::iterator InsertAddr = InsertPt; if (StackSize) { unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) - .addImm(StackSize); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), - WebAssembly::SP32) + InsertAddr = + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addImm(StackSize); + // In the epilog we don't need to write the result back to the SP32 physreg + // because it won't be used again. We can use a stackified register instead. + SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), SPReg) .addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32) .addReg(OffsetReg); WFI->stackifyVReg(OffsetReg); + WFI->stackifyVReg(SPReg); + } else { + SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32; } - writeSPToMemory( - (!StackSize && hasFP(MF)) ? WebAssembly::FP32 : WebAssembly::SP32, MF, - MBB, InsertPt, DL); + writeSPToMemory(SPReg, MF, MBB, InsertAddr, InsertPt, DL); } Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -51,6 +51,51 @@ return Reserved; } +static bool isStackifiedVReg(const WebAssemblyFunctionInfo *WFI, + const MachineOperand& Op) { + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + return TargetRegisterInfo::isVirtualRegister(Reg) && + WFI->isVRegStackified(Reg); + } + return false; +} + +static bool canStackifyOperand(const MachineInstr& Inst) { + unsigned Op = Inst.getOpcode(); + return Op != TargetOpcode::PHI && + Op != TargetOpcode::INLINEASM && + Op != TargetOpcode::DBG_VALUE; +} + +// Determine if the FI sequence can be stackified, and if so, where the code can +// be inserted. If stackification is possible, returns true and ajusts II to +// point to the insertion point. +bool findInsertPt(const WebAssemblyFunctionInfo *WFI, MachineBasicBlock &MBB, + unsigned OperandNum, MachineBasicBlock::iterator &II) { + if (!canStackifyOperand(*II)) return false; + + MachineBasicBlock::iterator InsertPt(II); + int StackCount = 0; + // Operands are popped in reverse order, so any operands after FIOperand + // impose a constraint + for (unsigned i = OperandNum; i < II->getNumOperands(); i++) { + if (isStackifiedVReg(WFI, II->getOperand(i))) ++StackCount; + } + // Walk backwards, tracking stack depth. When it reaches 0 we have reached the + // top of the subtree. + while (StackCount) { + if (InsertPt == MBB.begin()) return false; + --InsertPt; + for (const auto &def : InsertPt->defs()) + if (isStackifiedVReg(WFI, def)) --StackCount; + for (const auto &use : InsertPt->explicit_uses()) + if (isStackifiedVReg(WFI, use)) ++StackCount; + } + II = InsertPt; + return true; +} + void WebAssemblyRegisterInfo::eliminateFrameIndex( MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger * /*RS*/) const { @@ -78,20 +123,34 @@ MI.getOperand(FIOperandNum) .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false); } else { - // Otherwise create an i32.add SP, offset and make it the operand. + // Otherwise calculate the address auto &MRI = MF.getRegInfo(); const auto *TII = MF.getSubtarget().getInstrInfo(); unsigned FIRegOperand = WebAssembly::SP32; if (FrameOffset) { - FIRegOperand = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::CONST_I32), - FIRegOperand) + // Create i32.add SP, offset and make it the operand. We want to stackify + // this sequence, but we need to preserve the LIFO expr stack ordering + // (i.e. we can't insert our code in between MI and any operands it + // pops before FIOperand). + auto *WFI = MF.getInfo(); + bool CanStackifyFI = findInsertPt(WFI, MBB, FIOperandNum, II); + + unsigned OffsetOp = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32), + OffsetOp) .addImm(FrameOffset); - BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::ADD_I32), + if (CanStackifyFI) { + WFI->stackifyVReg(OffsetOp); + FIRegOperand = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + WFI->stackifyVReg(FIRegOperand); + } else { + FIRegOperand = OffsetOp; + } + BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32), FIRegOperand) .addReg(WebAssembly::SP32) - .addReg(FIRegOperand); + .addReg(OffsetOp); } MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false); } Index: llvm/trunk/test/CodeGen/WebAssembly/byval.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/byval.ll +++ llvm/trunk/test/CodeGen/WebAssembly/byval.ll @@ -35,15 +35,15 @@ ; CHECK-NEXT: i32.load $push[[L4:.+]]=, 0($0) ; CHECK-NEXT: i32.store {{.*}}=, 12([[SP]]), $pop[[L4]] ; Pass a pointer to the stack slot to the function - ; CHECK-NEXT: i32.const [[L5:.+]]=, 12 - ; CHECK-NEXT: i32.add [[ARG:.+]]=, [[SP]], [[L5]] - ; CHECK-NEXT: call ext_byval_func@FUNCTION, [[L5]] + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12 + ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, [[SP]], $pop[[L5]] + ; CHECK-NEXT: call ext_byval_func@FUNCTION, $pop[[ARG]] call void @ext_byval_func(%SmallStruct* byval %ptr) ; Restore the stack - ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 16 - ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L6]] ; CHECK-NEXT: i32.const $push[[L7:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L7]]), [[SP]] + ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 16 + ; CHECK-NEXT: i32.add $push[[L8:.+]]=, [[SP]], $pop[[L6]] + ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L7]]), $pop[[L8]] ; CHECK-NEXT: return ret void } @@ -58,9 +58,9 @@ ; CHECK: i32.load $push[[L4:.+]]=, 0($0):p2align=3 ; CHECK-NEXT: i32.store {{.*}}=, 8([[SP]]):p2align=3, $pop[[L4]] ; Pass a pointer to the stack slot to the function - ; CHECK-NEXT: i32.const [[L5:.+]]=, 8 - ; CHECK-NEXT: i32.add [[ARG:.+]]=, [[SP]], [[L5]] - ; CHECK-NEXT: call ext_byval_func_align8@FUNCTION, [[L5]] + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 8 + ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, [[SP]], $pop[[L5]] + ; CHECK-NEXT: call ext_byval_func_align8@FUNCTION, $pop[[ARG]] call void @ext_byval_func_align8(%SmallStruct* byval align 8 %ptr) ret void } Index: llvm/trunk/test/CodeGen/WebAssembly/mem-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/mem-intrinsics.ll +++ llvm/trunk/test/CodeGen/WebAssembly/mem-intrinsics.ll @@ -61,8 +61,8 @@ ; CHECK-LABEL: frame_index: -; CHECK: i32.call $discard=, memset@FUNCTION, $0, $pop1, $pop0{{$}} -; CHECK: i32.call $discard=, memset@FUNCTION, $1, $pop3, $pop2{{$}} +; CHECK: i32.call $discard=, memset@FUNCTION, $pop12, $pop1, $pop0{{$}} +; CHECK: i32.call $discard=, memset@FUNCTION, $0, $pop3, $pop2{{$}} ; CHECK: return{{$}} define void @frame_index() { entry: Index: llvm/trunk/test/CodeGen/WebAssembly/userstack.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/userstack.ll +++ llvm/trunk/test/CodeGen/WebAssembly/userstack.ll @@ -22,10 +22,10 @@ ; CHECK: i32.const $push[[L0:.+]]=, 0 ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L0]] store i32 0, i32* %retval - ; CHECK: i32.const $push[[L5:.+]]=, 16 - ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L5]] - ; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), [[SP]] + ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16 + ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]] + ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]] ret void } @@ -49,7 +49,7 @@ } ; CHECK-LABEL: allocarray: -; CHECK: .local i32, i32{{$}} +; CHECK: .local i32{{$}} define void @allocarray() { ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]]) @@ -59,10 +59,10 @@ ; CHECK-NEXT: i32.store $discard=, 0($pop[[L4]]), [[SP]] %r = alloca [33 x i32] + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12 + ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]] ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 12 - ; CHECK-NEXT: i32.const [[L5:.+]]=, 12 - ; CHECK-NEXT: i32.add [[L5]]=, [[SP]], [[L5]] - ; CHECK-NEXT: i32.add $push[[L6:.+]]=, [[L5]], $pop[[L4]] + ; CHECK-NEXT: i32.add $push[[L6:.+]]=, $pop[[L7]], $pop[[L4]] ; CHECK-NEXT: i32.const $push[[L9:.+]]=, 1{{$}} ; CHECK-NEXT: i32.store $push[[L10:.+]]=, 12([[SP]]), $pop[[L9]]{{$}} ; CHECK-NEXT: i32.store $discard=, 0($pop3), $pop[[L10]]{{$}} @@ -71,10 +71,10 @@ %p2 = getelementptr [33 x i32], [33 x i32]* %r, i32 0, i32 3 store i32 1, i32* %p2 - ; CHECK: i32.const $push[[L11:.+]]=, 144 - ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L11]] - ; CHECK-NEXT: i32.const $push[[L12:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L12]]), [[SP]] + ; CHECK: i32.const $push[[L12:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.const $push[[L11:.+]]=, 144 + ; CHECK-NEXT: i32.add $push[[L13:.+]]=, [[SP]], $pop[[L11]] + ; CHECK-NEXT: i32.store $discard=, 0($pop[[L12]]), $pop[[L13]] ret void } @@ -86,18 +86,18 @@ %r = alloca i64 %r2 = alloca i64 ; %r is at SP+8 - ; CHECK: i32.const [[OFF:.+]]=, 8 - ; CHECK-NEXT: i32.add [[ARG1:.+]]=, [[SP]], [[OFF]] - ; CHECK-NEXT: call ext_func@FUNCTION, [[ARG1]] + ; CHECK: i32.const $push[[OFF:.+]]=, 8 + ; CHECK-NEXT: i32.add $push[[ARG1:.+]]=, [[SP]], $pop[[OFF]] + ; CHECK-NEXT: call ext_func@FUNCTION, $pop[[ARG1]] call void @ext_func(i64* %r) ; %r2 is at SP+0, no add needed ; CHECK-NEXT: call ext_func@FUNCTION, [[SP]] call void @ext_func(i64* %r2) ; Use as a value, but in a store ; %buf is at SP+16 - ; CHECK: i32.const [[OFF:.+]]=, 16 - ; CHECK-NEXT: i32.add [[VAL:.+]]=, [[SP]], [[OFF]] - ; CHECK-NEXT: i32.store {{.*}}=, 0($0), [[VAL]] + ; CHECK: i32.const $push[[OFF:.+]]=, 16 + ; CHECK-NEXT: i32.add $push[[VAL:.+]]=, [[SP]], $pop[[OFF]] + ; CHECK-NEXT: i32.store {{.*}}=, 0($0), $pop[[VAL]] %gep = getelementptr inbounds [27 x i8], [27 x i8]* %buf, i32 0, i32 0 store i8* %gep, i8** %addr ret void @@ -120,10 +120,10 @@ %p2 = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 3 store i32 1, i32* %p2 call void @ext_func(i64* null); - ; CHECK: i32.const $push[[L5:.+]]=, 32 - ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L5]] - ; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), [[SP]] + ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 32 + ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]] + ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]] ret void } @@ -143,7 +143,7 @@ ; CHECK: call ext_func_i32@FUNCTION call void @ext_func_i32(i32* %r) ; CHECK: i32.const $push[[L3:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store [[SP]]=, 0($pop[[L3]]), [[FP]] + ; CHECK-NEXT: i32.store $discard=, 0($pop[[L3]]), [[FP]] ret void } @@ -183,10 +183,10 @@ %r1 = alloca i32 %r = alloca i32, i32 %alloc store i32 0, i32* %r - ; CHECK: i32.const $push[[L5:.+]]=, 16 - ; CHECK-NEXT: i32.add [[SP]]=, [[FP]], $pop[[L5]] - ; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), [[SP]] + ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16 + ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[FP]], $pop[[L5]] + ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]] ret void } @@ -198,9 +198,9 @@ ; CHECK: i32.const $push[[L1:.+]]=, 16 ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]] %addr = alloca i32 - ; CHECK: i32.const [[OFF:.+]]=, 12 - ; CHECK-NEXT: i32.add [[ADDR:.+]]=, [[SP]], [[OFF]] - ; CHECK-NEXT: copy_local [[COPY:.+]]=, [[ADDR]] + ; CHECK: i32.const $push[[OFF:.+]]=, 12 + ; CHECK-NEXT: i32.add $push[[ADDR:.+]]=, [[SP]], $pop[[OFF]] + ; CHECK-NEXT: copy_local [[COPY:.+]]=, $pop[[ADDR]] br label %body body: %a = phi i32* [%addr, %entry], [%b, %body]