Index: llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -238,17 +238,25 @@ LLT ResTy = Res.getLLTTy(*getMRI()); LLT Op0Ty = Op0.getLLTTy(*getMRI()); - assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type"); - assert((ResTy.getElementType() == Op0Ty.getElementType()) && - "Different vector element types"); - assert((ResTy.getNumElements() > Op0Ty.getNumElements()) && - "Op0 has more elements"); + assert(ResTy.isVector() && "Res non vector type"); - auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0); SmallVector Regs; - for (auto Op : Unmerge.getInstr()->defs()) - Regs.push_back(Op.getReg()); - Register Undef = buildUndef(Op0Ty.getElementType()).getReg(0); + if (Op0Ty.isVector()) { + assert((ResTy.getElementType() == Op0Ty.getElementType()) && + "Different vector element types"); + assert((ResTy.getNumElements() > Op0Ty.getNumElements()) && + "Op0 has more elements"); + auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0); + + for (auto Op : Unmerge.getInstr()->defs()) + Regs.push_back(Op.getReg()); + } else { + assert((ResTy.getSizeInBits() > Op0Ty.getSizeInBits()) && + "Op0 has more size"); + Regs.push_back(Op0.getReg()); + } + Register Undef = + buildUndef(Op0Ty.isVector() ? Op0Ty.getElementType() : Op0Ty).getReg(0); unsigned NumberOfPadElts = ResTy.getNumElements() - Regs.size(); for (unsigned i = 0; i < NumberOfPadElts; ++i) Regs.push_back(Undef); Index: llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -406,31 +406,22 @@ if (NewVT.isVector()) { if (OldLLT.isVector()) { if (NewLLT.getNumElements() > OldLLT.getNumElements()) { - // We don't handle VA types which are not exactly twice the - // size, but can easily be done in future. - if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) { - LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts"); - return false; - } - auto Undef = MIRBuilder.buildUndef({OldLLT}); + CurVReg = - MIRBuilder.buildMergeLikeInstr({NewLLT}, {CurVReg, Undef}) + MIRBuilder.buildPadVectorWithUndefElements(NewLLT, CurVReg) .getReg(0); } else { // Just do a vector extend. CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}) .getReg(0); } - } else if (NewLLT.getNumElements() == 2) { - // We need to pad a <1 x S> type to <2 x S>. Since we don't have - // <1 x S> vector types in GISel we use a build_vector instead - // of a vector merge/concat. - auto Undef = MIRBuilder.buildUndef({OldLLT}); - CurVReg = - MIRBuilder - .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)}) - .getReg(0); - } else { + } else if (NewLLT.getNumElements() >= 2 && + NewLLT.getNumElements() <= 8) { + // We need to pad a <1 x S> type to <2/4/8 x S>. Since we don't + // have <1 x S> vector types in GISel we use a build_vector + // instead of a vector merge/concat. + CurVReg = MIRBuilder.buildPadVectorWithUndefElements(NewLLT, CurVReg).getReg(0); + } else { LLVM_DEBUG(dbgs() << "Could not handle ret ty\n"); return false; } Index: llvm/test/CodeGen/AArch64/GlobalISel/ret-1x-vec.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/ret-1x-vec.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/ret-1x-vec.ll @@ -4,32 +4,101 @@ define <1 x float> @ret_v1f32(<1 x float> %v) { ; CHECK-LABEL: name: ret_v1f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[DEF]](s32) - ; CHECK: $d0 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK: RET_ReallyLR implicit $d0 + ; CHECK-NEXT: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[DEF]](s32) + ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 ret <1 x float> %v } define <1 x ptr> @ret_v1p0(<1 x ptr> %v) { ; CHECK-LABEL: name: ret_v1p0 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $d0 - ; CHECK: $d0 = COPY [[COPY]](p0) - ; CHECK: RET_ReallyLR implicit $d0 + ; CHECK-NEXT: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $d0 + ; CHECK-NEXT: $d0 = COPY [[COPY]](p0) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 ret <1 x ptr> %v } define <1 x ptr addrspace(1)> @ret_v1p1(<1 x ptr addrspace(1)> %v) { ; CHECK-LABEL: name: ret_v1p1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $d0 - ; CHECK: $d0 = COPY [[COPY]](p1) - ; CHECK: RET_ReallyLR implicit $d0 + ; CHECK-NEXT: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $d0 + ; CHECK-NEXT: $d0 = COPY [[COPY]](p1) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 ret <1 x ptr addrspace(1)> %v } + +define <1 x i16> @ret_v1i16(<1 x i16> %v) { + ; CHECK-LABEL: name: ret_v1i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16) + ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + ret <1 x i16> %v +} + +define <1 x i8> @ret_v1i8(<1 x i8> %v) { + ; CHECK-LABEL: name: ret_v1i8 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<8 x s8>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[UV]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<8 x s8>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + ret <1 x i8> %v +} + +define <1 x i32> @ret_v1i32(<1 x i32> %v) { + ; CHECK-LABEL: name: ret_v1i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[DEF]](s32) + ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + ret <1 x i32> %v +} + +define <1 x i64> @ret_v1i64(<1 x i64> %v) { + ; CHECK-LABEL: name: ret_v1i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK-NEXT: $d0 = COPY [[COPY]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + ret <1 x i64> %v +} +define <1 x i1> @ret_v1i1(<1 x i1> %v) { + ; CHECK-LABEL: name: ret_v1i1 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ret <1 x i1> %v +} Index: llvm/test/CodeGen/AArch64/GlobalISel/vec-s16-param.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/vec-s16-param.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/vec-s16-param.ll @@ -8,9 +8,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK-NEXT: $d0 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV2]](s16), [[UV3]](s16), [[DEF]](s16), [[DEF]](s16) + ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 ret <2 x half> %v }