Index: llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -362,11 +362,6 @@ CallingConv::ID CC = F.getCallingConv(); for (unsigned i = 0; i < SplitEVTs.size(); ++i) { - if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) > 1) { - LLVM_DEBUG(dbgs() << "Can't handle extended arg types which need split"); - return false; - } - Register CurVReg = VRegs[i]; ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx), 0}; setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); @@ -375,7 +370,8 @@ // when widened using ANYEXT. We need to do it explicitly here. if (MRI.getType(CurVReg).getSizeInBits() == 1) { CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0); - } else { + } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) == + 1) { // Some types will need extending as specified by the CC. MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]); if (EVT(NewVT) != SplitEVTs[i]) { Index: llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -1,5 +1,3 @@ -; RUN: not --crash llc -O0 -global-isel -global-isel-abort=1 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR -; RUN: llc -O0 -global-isel -global-isel-abort=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefix=FALLBACK ; RUN: llc -O0 -global-isel -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o %t.out 2> %t.err ; RUN: FileCheck %s --check-prefix=FALLBACK-WITH-REPORT-OUT < %t.out ; RUN: FileCheck %s --check-prefix=FALLBACK-WITH-REPORT-ERR < %t.err @@ -15,23 +13,6 @@ ; BIG-ENDIAN: unable to translate in big endian mode -; We use __fixunstfti as the common denominator for __fixunstfti on Linux and -; ___fixunstfti on iOS -; ERROR: unable to translate instruction: ret -; FALLBACK: ldr q0, -; FALLBACK-NEXT: bl __fixunstfti -; -; FALLBACK-WITH-REPORT-ERR: unable to translate instruction: ret -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for ABIi128 -; FALLBACK-WITH-REPORT-OUT-LABEL: ABIi128: -; FALLBACK-WITH-REPORT-OUT: ldr q0, -; FALLBACK-WITH-REPORT-OUT-NEXT: bl __fixunstfti -define i128 @ABIi128(i128 %arg1) { - %farg1 = bitcast i128 %arg1 to fp128 - %res = fptoui fp128 %farg1 to i128 - ret i128 %res -} - ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %3:_(<3 x s32>), %4:_(p0) :: (store (<3 x s32>) into %ir.addr + 16, align 16, basealign 32) (in function: odd_vector) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_vector ; FALLBACK-WITH-REPORT-OUT-LABEL: odd_vector: @@ -41,15 +22,6 @@ ret void } - ; AArch64 was asserting instead of returning an invalid mapping for unknown - ; sizes. -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to translate instruction: ret: ' ret i128 undef' (in function: sequence_sizes) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for sequence_sizes -; FALLBACK-WITH-REPORT-LABEL: sequence_sizes: -define i128 @sequence_sizes([8 x i8] %in) { - ret i128 undef -} - ; Make sure we don't mess up metadata arguments. declare void @llvm.write_register.i64(metadata, i64) Index: llvm/test/CodeGen/AArch64/GlobalISel/translate-ret.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/translate-ret.ll @@ -0,0 +1,45 @@ +; RUN: llc -mtriple=arm64-apple-ios %s -o - -global-isel -global-isel-abort=1 -stop-after=irtranslator | FileCheck %s + +define i128 @func_i128(i128* %ptr) { +; CHECK-LABEL: name: func_i128 +; CHECK: [[PTR:%.*]]:_(p0) = COPY $x0 +; CHECK: [[VAL:%.*]]:_(s128) = G_LOAD [[PTR]] +; CHECK: [[LO:%.*]]:_(s64), [[HI:%.*]]:_(s64) = G_UNMERGE_VALUES [[VAL]] +; CHECK: $x0 = COPY [[LO]] +; CHECK: $x1 = COPY [[HI]] +; CHECK: RET_ReallyLR + + %val = load i128, i128* %ptr + ret i128 %val +} + +define <8 x float> @func_v8f32(<8 x float>* %ptr) { +; CHECK-LABEL: name: func_v8f32 +; CHECK: [[PTR:%.*]]:_(p0) = COPY $x0 +; CHECK: [[VAL:%.*]]:_(<8 x s32>) = G_LOAD [[PTR]] +; CHECK: [[LO:%.*]]:_(<4 x s32>), [[HI:%.*]]:_(<4 x s32>) = G_UNMERGE_VALUES [[VAL]] +; CHECK: $q0 = COPY [[LO]] +; CHECK: $q1 = COPY [[HI]] +; CHECK: RET_ReallyLR + + %val = load <8 x float>, <8 x float>* %ptr + ret <8 x float> %val +} + +; A bit weird, but s0-s5 is what SDAG does too. +define <6 x float> @func_v6f32(<6 x float>* %ptr) { +; CHECK-LABEL: name: func_v6f32 +; CHECK: [[PTR:%.*]]:_(p0) = COPY $x0 +; CHECK: [[VAL:%.*]]:_(<6 x s32>) = G_LOAD [[PTR]] +; CHECK: [[V1:%.*]]:_(s32), [[V2:%.*]]:_(s32), [[V3:%.*]]:_(s32), [[V4:%.*]]:_(s32), [[V5:%.*]]:_(s32), [[V6:%.*]]:_(s32) = G_UNMERGE_VALUES [[VAL]] +; CHECK: $s0 = COPY [[V1]] +; CHECK: $s1 = COPY [[V2]] +; CHECK: $s2 = COPY [[V3]] +; CHECK: $s3 = COPY [[V4]] +; CHECK: $s4 = COPY [[V5]] +; CHECK: $s5 = COPY [[V6]] +; CHECK: RET_ReallyLR + + %val = load <6 x float>, <6 x float>* %ptr + ret <6 x float> %val +} Index: llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -259,9 +259,10 @@ ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: fmov x8, d0 ; CHECK-NEXT: asr x1, x8, #63 -; CHECK-NEXT: mov.d v0[1], x1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret + ; X0 & X1 are the real return registers, SDAG messes with v0 too for unknown reasons. +; CHECK: {{(mov.d v0[1], x1)?}} +; CHECK: fmov x0, d0 +; CHECK: ret ; %res = sext <1 x i64> %arg to <1 x i128> ret <1 x i128> %res