Index: llvm/trunk/lib/Target/X86/X86CallLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86CallLowering.h +++ llvm/trunk/lib/Target/X86/X86CallLowering.h @@ -34,6 +34,15 @@ bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef VRegs) const override; +private: + /// A function of this type is used to perform value split action. + typedef std::function, ArrayRef)> + SplitArgTy; + + void splitToValueTypes(const ArgInfo &OrigArgInfo, + SmallVectorImpl &SplitArgs, + const DataLayout &DL, MachineRegisterInfo &MRI, + SplitArgTy SplitArg) const; }; } // End of namespace llvm; #endif Index: llvm/trunk/lib/Target/X86/X86CallLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86CallLowering.cpp +++ llvm/trunk/lib/Target/X86/X86CallLowering.cpp @@ -35,17 +35,94 @@ X86CallLowering::X86CallLowering(const X86TargetLowering &TLI) : CallLowering(&TLI) {} +void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, + SmallVectorImpl &SplitArgs, + const DataLayout &DL, + MachineRegisterInfo &MRI, + SplitArgTy PerformArgSplit) const { + + const X86TargetLowering &TLI = *getTLI(); + LLVMContext &Context = OrigArg.Ty->getContext(); + EVT VT = TLI.getValueType(DL, OrigArg.Ty); + unsigned NumParts = TLI.getNumRegisters(Context, VT); + + if (NumParts == 1) { + SplitArgs.push_back(OrigArg); + return; + } + + SmallVector BitOffsets; + SmallVector SplitRegs; + + EVT PartVT = TLI.getRegisterType(Context, VT); + Type *PartTy = PartVT.getTypeForEVT(Context); + + for (unsigned i = 0; i < NumParts; ++i) { + ArgInfo Info = ArgInfo{MRI.createGenericVirtualRegister(LLT{*PartTy, DL}), + PartTy, OrigArg.Flags}; + SplitArgs.push_back(Info); + BitOffsets.push_back(PartVT.getSizeInBits() * i); + SplitRegs.push_back(Info.Reg); + } + + PerformArgSplit(SplitRegs, BitOffsets); +} + +namespace { +struct FuncReturnHandler : public CallLowering::ValueHandler { + FuncReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + MachineInstrBuilder &MIB, CCAssignFn *AssignFn) + : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} + + unsigned getStackAddress(uint64_t Size, int64_t Offset, + MachinePointerInfo &MPO) override { + llvm_unreachable("Don't know how to get a stack address yet"); + } + + void assignValueToReg(unsigned ValVReg, unsigned PhysReg, + CCValAssign &VA) override { + MIB.addUse(PhysReg, RegState::Implicit); + unsigned ExtReg = extendRegister(ValVReg, VA); + MIRBuilder.buildCopy(PhysReg, ExtReg); + } + + void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size, + MachinePointerInfo &MPO, CCValAssign &VA) override { + llvm_unreachable("Don't know how to assign a value to an address yet"); + } + + MachineInstrBuilder &MIB; +}; +} // End anonymous namespace. + bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, unsigned VReg) const { - // TODO: handle functions returning non-void values. - if (Val) - return false; - // silence unused-function warning, remove after the function implementation. - (void)RetCC_X86; + assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg"); + + auto MIB = MIRBuilder.buildInstrNoInsert(X86::RET).addImm(0); - MIRBuilder.buildInstr(X86::RET).addImm(0); + if (VReg) { + MachineFunction &MF = MIRBuilder.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + auto &DL = MF.getDataLayout(); + const Function &F = *MF.getFunction(); + + ArgInfo OrigArg{VReg, Val->getType()}; + setArgFlags(OrigArg, AttributeSet::ReturnIndex, DL, F); + + SmallVector SplitArgs; + splitToValueTypes(OrigArg, SplitArgs, DL, MRI, + [&](ArrayRef Regs, ArrayRef Offsets) { + MIRBuilder.buildExtract(Regs, Offsets, VReg); + }); + + FuncReturnHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86); + if(!handleAssignments(MIRBuilder, SplitArgs, Handler)) + return false; + } + MIRBuilder.insertInstr(MIB); return true; } @@ -98,18 +175,32 @@ if (F.isVarArg()) return false; - auto DL = MIRBuilder.getMF().getDataLayout(); + MachineFunction &MF = MIRBuilder.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + auto DL = MF.getDataLayout(); - SmallVector ArgInfos; + SmallVector SplitArgs; unsigned Idx = 0; for (auto &Arg : F.getArgumentList()) { - ArgInfo AInfo(VRegs[Idx], Arg.getType()); - setArgFlags(AInfo, Idx + 1, DL, F); - ArgInfos.push_back(AInfo); + ArgInfo OrigArg(VRegs[Idx], Arg.getType()); + setArgFlags(OrigArg, Idx + 1, DL, F); + splitToValueTypes(OrigArg, SplitArgs, DL, MRI, + [&](ArrayRef Regs, ArrayRef Offsets) { + MIRBuilder.buildSequence(VRegs[Idx], Regs, Offsets); + }); Idx++; } - FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(), - CC_X86, DL); - return handleAssignments(MIRBuilder, ArgInfos, ArgHandler); + MachineBasicBlock &MBB = MIRBuilder.getMBB(); + if (!MBB.empty()) + MIRBuilder.setInstr(*MBB.begin()); + + FormalArgHandler Handler(MIRBuilder, MRI, CC_X86, DL); + if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) + return false; + + // Move back to the end of the basic block. + MIRBuilder.setMBB(MBB); + + return true; } Index: llvm/trunk/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll +++ llvm/trunk/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll @@ -5,8 +5,8 @@ @a7_8bit = external global i8 @a8_8bit = external global i8 -define void @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, - i8 %arg5, i8 %arg6, i8 %arg7, i8 %arg8) { +define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, + i8 %arg5, i8 %arg6, i8 %arg7, i8 %arg8) { ; ALL-LABEL: name: test_i8_args_8 @@ -57,19 +57,22 @@ ; ALL-NEXT: G_STORE [[ARG1]](s8), [[GADDR_A1]](p0) :: (store 1 into @a1_8bit) ; ALL-NEXT: G_STORE [[ARG7]](s8), [[GADDR_A7]](p0) :: (store 1 into @a7_8bit) ; ALL-NEXT: G_STORE [[ARG8]](s8), [[GADDR_A8]](p0) :: (store 1 into @a8_8bit) +; ALL-NEXT: %al = COPY [[ARG1]](s8) +; ALL-NEXT: RET 0, implicit %al + entry: store i8 %arg1, i8* @a1_8bit store i8 %arg7, i8* @a7_8bit store i8 %arg8, i8* @a8_8bit - ret void + ret i8 %arg1 } @a1_32bit = external global i32 @a7_32bit = external global i32 @a8_32bit = external global i32 -define void @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, - i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8) { +define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, + i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8) { ; ALL-LABEL: name: test_i32_args_8 @@ -120,9 +123,159 @@ ; ALL-NEXT: G_STORE [[ARG1]](s32), [[GADDR_A1]](p0) :: (store 4 into @a1_32bit) ; ALL-NEXT: G_STORE [[ARG7]](s32), [[GADDR_A7]](p0) :: (store 4 into @a7_32bit) ; ALL-NEXT: G_STORE [[ARG8]](s32), [[GADDR_A8]](p0) :: (store 4 into @a8_32bit) +; ALL-NEXT: %eax = COPY [[ARG1]](s32) +; ALL-NEXT: RET 0, implicit %eax + entry: store i32 %arg1, i32* @a1_32bit store i32 %arg7, i32* @a7_32bit store i32 %arg8, i32* @a8_32bit - ret void + ret i32 %arg1 +} + +@a1_64bit = external global i64 +@a7_64bit = external global i64 +@a8_64bit = external global i64 + +define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, + i64 %arg5, i64 %arg6, i64 %arg7, i64 %arg8) { + +; ALL-LABEL: name: test_i64_args_8 +; X64: fixedStack: +; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 8, alignment: 8, isImmutable: true, isAliased: false +; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 8, alignment: 16, isImmutable: true, isAliased: false +; X64: liveins: %rcx, %rdi, %rdx, %rsi, %r8, %r9 +; X64: [[ARG1:%[0-9]+]](s64) = COPY %rdi +; X64-NEXT: %{{[0-9]+}}(s64) = COPY %rsi +; X64-NEXT: %{{[0-9]+}}(s64) = COPY %rdx +; X64-NEXT: %{{[0-9]+}}(s64) = COPY %rcx +; X64-NEXT: %{{[0-9]+}}(s64) = COPY %r8 +; X64-NEXT: %{{[0-9]+}}(s64) = COPY %r9 +; X64-NEXT: [[ARG7_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] +; X64-NEXT: [[ARG7:%[0-9]+]](s64) = G_LOAD [[ARG7_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0) +; X64-NEXT: [[ARG8_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] +; X64-NEXT: [[ARG8:%[0-9]+]](s64) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK8]], align 0) + +; X32: fixedStack: +; X32: id: [[STACK60:[0-9]+]], offset: 60, size: 4, alignment: 4, isImmutable: true, isAliased: false } +; X32: id: [[STACK56:[0-9]+]], offset: 56, size: 4, alignment: 8, isImmutable: true, isAliased: false } +; X32: id: [[STACK52:[0-9]+]], offset: 52, size: 4, alignment: 4, isImmutable: true, isAliased: false } +; X32: id: [[STACK48:[0-9]+]], offset: 48, size: 4, alignment: 16, isImmutable: true, isAliased: false } +; X32: id: [[STACK44:[0-9]+]], offset: 44, size: 4, alignment: 4, isImmutable: true, isAliased: false } +; X32: id: [[STACK40:[0-9]+]], offset: 40, size: 4, alignment: 8, isImmutable: true, isAliased: false } +; X32: id: [[STACK36:[0-9]+]], offset: 36, size: 4, alignment: 4, isImmutable: true, isAliased: false } +; X32: id: [[STACK32:[0-9]+]], offset: 32, size: 4, alignment: 16, isImmutable: true, isAliased: false } +; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 4, alignment: 4, isImmutable: true, isAliased: false } +; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 4, alignment: 8, isImmutable: true, isAliased: false } +; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 4, alignment: 4, isImmutable: true, isAliased: false } +; X32: id: [[STACK16:[0-9]+]], offset: 16, size: 4, alignment: 16, isImmutable: true, isAliased: false } +; X32: id: [[STACK12:[0-9]+]], offset: 12, size: 4, alignment: 4, isImmutable: true, isAliased: false } +; X32: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false } +; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } +; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false } + +; X32: [[ARG1L_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] +; X32-NEXT: [[ARG1L:%[0-9]+]](s32) = G_LOAD [[ARG1L_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0) +; X32-NEXT: [[ARG1H_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] +; X32-NEXT: [[ARG1H:%[0-9]+]](s32) = G_LOAD [[ARG1H_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK4]], align 0) +; X32-NEXT: %{{[0-9]+}}(p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] +; X32-NEXT: %{{[0-9]+}}(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK8]], align 0) +; X32-NEXT: %{{[0-9]+}}(p0) = G_FRAME_INDEX %fixed-stack.[[STACK12]] +; X32-NEXT: %{{[0-9]+}}(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK12]], align 0) +; X32-NEXT: %{{[0-9]+}}(p0) = G_FRAME_INDEX %fixed-stack.[[STACK16]] +; X32-NEXT: %{{[0-9]+}}(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK16]], align 0) +; X32-NEXT: %{{[0-9]+}}(p0) = G_FRAME_INDEX %fixed-stack.[[STACK20]] +; X32-NEXT: %{{[0-9]+}}(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK20]], align 0) +; X32-NEXT: %{{[0-9]+}}(p0) = G_FRAME_INDEX %fixed-stack.[[STACK24]] +; X32-NEXT: %{{[0-9]+}}(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK24]], align 0) +; X32-NEXT: %{{[0-9]+}}(p0) = G_FRAME_INDEX %fixed-stack.[[STACK28]] +; X32-NEXT: %{{[0-9]+}}(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK28]], align 0) +; X32-NEXT: %{{[0-9]+}}(p0) = G_FRAME_INDEX %fixed-stack.[[STACK32]] +; X32-NEXT: %{{[0-9]+}}(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK32]], align 0) +; X32-NEXT: %{{[0-9]+}}(p0) = G_FRAME_INDEX %fixed-stack.[[STACK36]] +; X32-NEXT: %{{[0-9]+}}(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK36]], align 0) +; X32-NEXT: %{{[0-9]+}}(p0) = G_FRAME_INDEX %fixed-stack.[[STACK40]] +; X32-NEXT: %{{[0-9]+}}(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK40]], align 0) +; X32-NEXT: %{{[0-9]+}}(p0) = G_FRAME_INDEX %fixed-stack.[[STACK44]] +; X32-NEXT: %{{[0-9]+}}(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK44]], align 0) +; X32-NEXT: [[ARG7L_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK48]] +; X32-NEXT: [[ARG7L:%[0-9]+]](s32) = G_LOAD [[ARG7L_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK48]], align 0) +; X32-NEXT: [[ARG7H_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK52]] +; X32-NEXT: [[ARG7H:%[0-9]+]](s32) = G_LOAD [[ARG7H_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK52]], align 0) +; X32-NEXT: [[ARG8L_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK56]] +; X32-NEXT: [[ARG8L:%[0-9]+]](s32) = G_LOAD [[ARG8L_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK56]], align 0) +; X32-NEXT: [[ARG8H_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK60]] +; X32-NEXT: [[ARG8H:%[0-9]+]](s32) = G_LOAD [[ARG8H_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK60]], align 0) +; X32-NEXT: [[ARG1:%[0-9]+]](s64) = G_SEQUENCE [[ARG1L:%[0-9]+]](s32), 0, [[ARG1H:%[0-9]+]](s32), 32 +; X32-NEXT: %{{[0-9]+}}(s64) = G_SEQUENCE %{{[0-9]+}}(s32), 0, %{{[0-9]+}}(s32), 32 +; X32-NEXT: %{{[0-9]+}}(s64) = G_SEQUENCE %{{[0-9]+}}(s32), 0, %{{[0-9]+}}(s32), 32 +; X32-NEXT: %{{[0-9]+}}(s64) = G_SEQUENCE %{{[0-9]+}}(s32), 0, %{{[0-9]+}}(s32), 32 +; X32-NEXT: %{{[0-9]+}}(s64) = G_SEQUENCE %{{[0-9]+}}(s32), 0, %{{[0-9]+}}(s32), 32 +; X32-NEXT: %{{[0-9]+}}(s64) = G_SEQUENCE %{{[0-9]+}}(s32), 0, %{{[0-9]+}}(s32), 32 +; X32-NEXT: [[ARG7:%[0-9]+]](s64) = G_SEQUENCE [[ARG7L:%[0-9]+]](s32), 0, [[ARG7H:%[0-9]+]](s32), 32 +; X32-NEXT: [[ARG8:%[0-9]+]](s64) = G_SEQUENCE [[ARG8L:%[0-9]+]](s32), 0, [[ARG8H:%[0-9]+]](s32), 32 + +; ALL-NEXT: [[GADDR_A1:%[0-9]+]](p0) = G_GLOBAL_VALUE @a1_64bit +; ALL-NEXT: [[GADDR_A7:%[0-9]+]](p0) = G_GLOBAL_VALUE @a7_64bit +; ALL-NEXT: [[GADDR_A8:%[0-9]+]](p0) = G_GLOBAL_VALUE @a8_64bit +; ALL-NEXT: G_STORE [[ARG1]](s64), [[GADDR_A1]](p0) :: (store 8 into @a1_64bit +; ALL-NEXT: G_STORE [[ARG7]](s64), [[GADDR_A7]](p0) :: (store 8 into @a7_64bit +; ALL-NEXT: G_STORE [[ARG8]](s64), [[GADDR_A8]](p0) :: (store 8 into @a8_64bit + +; X64-NEXT: %rax = COPY [[ARG1]](s64) +; X64-NEXT: RET 0, implicit %rax + +; X32-NEXT: [[RETL:%[0-9]+]](s32), [[RETH:%[0-9]+]](s32) = G_EXTRACT [[ARG1:%[0-9]+]](s64), 0, 32 +; X32-NEXT: %eax = COPY [[RETL:%[0-9]+]](s32) +; X32-NEXT: %edx = COPY [[RETH:%[0-9]+]](s32) +; X32-NEXT: RET 0, implicit %eax, implicit %edx + +entry: + store i64 %arg1, i64* @a1_64bit + store i64 %arg7, i64* @a7_64bit + store i64 %arg8, i64* @a8_64bit + ret i64 %arg1 +} + +define float @test_float_args(float %arg1, float %arg2) { +; ALL-LABEL:name: test_float_args + +; X64: liveins: %xmm0, %xmm1 +; X64: [[ARG1:%[0-9]+]](s32) = COPY %xmm0 +; X64-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %xmm1 +; X64-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](s32) +; X64-NEXT: RET 0, implicit %xmm0 + +; X32: fixedStack: +; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } +; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false } +; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] +; X32-NEXT: [[ARG1:%[0-9]+]](s32) = G_LOAD [[ARG1_ADDR:%[0-9]+]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0) +; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] +; X32-NEXT: [[ARG2:%[0-9]+]](s32) = G_LOAD [[ARG2_ADDR:%[0-9]+]](p0) :: (invariant load 4 from %fixed-stack.[[STACK4]], align 0) +; X32-NEXT: %fp0 = COPY [[ARG2:%[0-9]+]](s32) +; X32-NEXT: RET 0, implicit %fp0 + + ret float %arg2 +} + +define double @test_double_args(double %arg1, double %arg2) { +; ALL-LABEL:name: test_double_args +; X64: liveins: %xmm0, %xmm1 +; X64: [[ARG1:%[0-9]+]](s64) = COPY %xmm0 +; X64-NEXT: [[ARG2:%[0-9]+]](s64) = COPY %xmm1 +; X64-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](s64) +; X64-NEXT: RET 0, implicit %xmm0 + +; X32: fixedStack: +; X32: id: [[STACK4:[0-9]+]], offset: 8, size: 8, alignment: 8, isImmutable: true, isAliased: false } +; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 8, alignment: 16, isImmutable: true, isAliased: false } +; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] +; X32-NEXT: [[ARG1:%[0-9]+]](s64) = G_LOAD [[ARG1_ADDR:%[0-9]+]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0) +; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] +; X32-NEXT: [[ARG2:%[0-9]+]](s64) = G_LOAD [[ARG2_ADDR:%[0-9]+]](p0) :: (invariant load 8 from %fixed-stack.[[STACK4]], align 0) +; X32-NEXT: %fp0 = COPY [[ARG2:%[0-9]+]](s64) +; X32-NEXT: RET 0, implicit %fp0 + + ret double %arg2 } Index: llvm/trunk/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll +++ llvm/trunk/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll @@ -1,35 +1,25 @@ -; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=X64 +; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64 -@a1_64bit = external global i64 -@a7_64bit = external global i64 -@a8_64bit = external global i64 +define <4 x i32> @test_v4i32_args(<4 x i32> %arg1, <4 x i32> %arg2) { +; X64: name: test_v4i32_args +; X64: liveins: %xmm0, %xmm1 +; X64: [[ARG1:%[0-9]+]](<4 x s32>) = COPY %xmm0 +; X64-NEXT: [[ARG2:%[0-9]+]](<4 x s32>) = COPY %xmm1 +; X64-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](<4 x s32>) +; X64-NEXT: RET 0, implicit %xmm0 + ret <4 x i32> %arg2 +} + +define <8 x i32> @test_v8i32_args(<8 x i32> %arg1) { +; X64: name: test_v8i32_args +; X64: liveins: %xmm0, %xmm1 +; X64: [[ARG1L:%[0-9]+]](<4 x s32>) = COPY %xmm0 +; X64-NEXT: [[ARG1H:%[0-9]+]](<4 x s32>) = COPY %xmm1 +; X64-NEXT: [[ARG1:%[0-9]+]](<8 x s32>) = G_SEQUENCE [[ARG1L:%[0-9]+]](<4 x s32>), 0, [[ARG1H:%[0-9]+]](<4 x s32>), 128 +; X64-NEXT: [[RETL:%[0-9]+]](<4 x s32>), [[RETH:%[0-9]+]](<4 x s32>) = G_EXTRACT [[ARG1:%[0-9]+]](<8 x s32>), 0, 128 +; X64-NEXT: %xmm0 = COPY [[RETL:%[0-9]+]](<4 x s32>) +; X64-NEXT: %xmm1 = COPY [[RETH:%[0-9]+]](<4 x s32>) +; X64-NEXT: RET 0, implicit %xmm0, implicit %xmm1 -define void @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, - i64 %arg5, i64 %arg6, i64 %arg7, i64 %arg8) { -; X64-LABEL: name: test_i64_args_8 -; X64: fixedStack: -; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 8, alignment: 8, isImmutable: true, isAliased: false -; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 8, alignment: 16, isImmutable: true, isAliased: false -; X64: liveins: %rcx, %rdi, %rdx, %rsi, %r8, %r9 -; X64: [[ARG1:%[0-9]+]](s64) = COPY %rdi -; X64-NEXT: %{{[0-9]+}}(s64) = COPY %rsi -; X64-NEXT: %{{[0-9]+}}(s64) = COPY %rdx -; X64-NEXT: %{{[0-9]+}}(s64) = COPY %rcx -; X64-NEXT: %{{[0-9]+}}(s64) = COPY %r8 -; X64-NEXT: %{{[0-9]+}}(s64) = COPY %r9 -; X64-NEXT: [[ARG7_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] -; X64-NEXT: [[ARG7:%[0-9]+]](s64) = G_LOAD [[ARG7_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0) -; X64-NEXT: [[ARG8_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] -; X64-NEXT: [[ARG8:%[0-9]+]](s64) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK8]], align 0) -; X64-NEXT: [[GADDR_A1:%[0-9]+]](p0) = G_GLOBAL_VALUE @a1_64bit -; X64-NEXT: [[GADDR_A7:%[0-9]+]](p0) = G_GLOBAL_VALUE @a7_64bit -; X64-NEXT: [[GADDR_A8:%[0-9]+]](p0) = G_GLOBAL_VALUE @a8_64bit -; X64-NEXT: G_STORE [[ARG1]](s64), [[GADDR_A1]](p0) :: (store 8 into @a1_64bit) -; X64-NEXT: G_STORE [[ARG7]](s64), [[GADDR_A7]](p0) :: (store 8 into @a7_64bit) -; X64-NEXT: G_STORE [[ARG8]](s64), [[GADDR_A8]](p0) :: (store 8 into @a8_64bit) -entry: - store i64 %arg1, i64* @a1_64bit - store i64 %arg7, i64* @a7_64bit - store i64 %arg8, i64* @a8_64bit - ret void + ret <8 x i32> %arg1 }