Index: lib/Target/X86/X86CallFrameOptimization.cpp =================================================================== --- lib/Target/X86/X86CallFrameOptimization.cpp +++ lib/Target/X86/X86CallFrameOptimization.cpp @@ -129,6 +129,7 @@ MachineRegisterInfo *MRI; unsigned SlotSize; unsigned Log2SlotSize; + const uint32_t *Win64RegMask; }; } // end anonymous namespace @@ -243,6 +244,12 @@ assert(isPowerOf2_32(SlotSize) && "Expect power of 2 stack slot size"); Log2SlotSize = Log2_32(SlotSize); + // Locate the index of CSR_Win64 + auto Win64RegMaskName = find(RegInfo.getRegMaskNames(), "CSR_Win64"); + assert(Win64RegMaskName != RegInfo.getRegMaskNames().end() && "Was CSR_Win64 renamed?"); + Win64RegMask = RegInfo.getRegMasks()[std::distance( + RegInfo.getRegMaskNames().begin(), Win64RegMaskName)]; + if (skipFunction(MF.getFunction()) || !isLegal(MF)) return false; @@ -387,15 +394,37 @@ // The COPY can be located anywhere between the call-frame setup // instruction and its first use. We use the call instruction as a boundary // because it is usually cheaper to check if an instruction is a call than - // checking if an instruction uses a register. - for (auto J = I; !J->isCall(); ++J) - if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() && - J->getOperand(1).getReg() == StackPtr) { + // checking if an instruction uses a register. This look-ahead is an + // opportunity to locate the call instruction and check if it is suitable for + // the transformation. + auto J = I; + for (; !J->isCall(); ++J) { + if (J->isCopy() && J->getOperand(0).isReg() && + !RegInfo.isPhysicalRegister(J->getOperand(0).getReg()) && + J->getOperand(1).isReg() && J->getOperand(1).getReg() == StackPtr) { + assert(StackPtrCopyInst == MBB.end() && + "More than one stack-pointer copy?!"); StackPtrCopyInst = J; Context.SPCopy = &*J++; StackPtr = Context.SPCopy->getOperand(0).getReg(); - break; } + assert(J != MBB.end() && "Did not see any call"); + } + auto TheCall = J; + + // This transform is not legal for Win64. We already checked if MF is not + // win64cc, but we also need to check that no transform is applied on a + // win64cc call site. Unfortunately, there is no straightforward way to check + // the callee's CC, so we resort to checking the register mask for the + // Win64-specific entry. + // FIXME: Perform a more robust check on the call for win64cc. + auto O = find_if(TheCall->operands(), [](const MachineOperand &Op) { + return Op.getType() == MachineOperand::MO_RegisterMask; + }); + assert(O != TheCall->operands_end() && "Call instruction has no RegisterMask operand"); + // Bail if call is win64cc. + if (O->getRegMask() == Win64RegMask) + return; // Scan the call setup sequence for the pattern we're looking for. // We only handle a simple case - a sequence of store instructions that @@ -457,15 +486,13 @@ } } - --I; - // We now expect the end of the sequence. If we stopped early, // or reached the end of the block without finding a call, bail. - if (I == MBB.end() || !I->isCall()) + if (std::prev(I) != TheCall) return; - Context.Call = &*I; - if ((++I)->getOpcode() != TII->getCallFrameDestroyOpcode()) + Context.Call = &*TheCall; + if (I->getOpcode() != TII->getCallFrameDestroyOpcode()) return; // Now, go through the vector, and see that we don't have any gaps, Index: test/CodeGen/X86/movtopush.ll =================================================================== --- test/CodeGen/X86/movtopush.ll +++ test/CodeGen/X86/movtopush.ll @@ -532,3 +532,25 @@ tail call void @eightparams64(i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, i64 0, i64 -1) ret void } + + +%struct.anon = type { %struct.anon.0 } +%struct.anon.0 = type { %struct.anon.1 } +%struct.anon.1 = type { i16, %struct.anon.2 } +%struct.anon.2 = type { %struct.anon.3 } +%struct.anon.3 = type { i8*, i32 } + +@d = common global %struct.anon zeroinitializer, align 8 +@ff = common local_unnamed_addr global i32 0, align 4 +@e = common local_unnamed_addr global i32 0, align 4 + +; Bail out gracefully in presence of win64cc calls +define void @pr35814() local_unnamed_addr { +entry: + %0 = load i32, i32* @ff, align 4 + %1 = load i32, i32* @e, align 4 + tail call win64cc void bitcast (void (...)* @g to void (%struct.anon*, i32, i32, %struct.anon*, %struct.anon*)*)(%struct.anon* byval nonnull align 8 @d, i32 %0, i32 %1, %struct.anon* byval nonnull align 8 @d, %struct.anon* byval nonnull align 8 @d) #2 + ret void +} + +declare win64cc void @g(...) local_unnamed_addr