diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -483,9 +483,6 @@ continue; } - // FIXME: Pack registers if we have more than one. - Register ArgReg = Args[i].Regs[0]; - EVT OrigVT = EVT::getEVT(Args[i].Ty); EVT VAVT = VA.getValVT(); const LLT OrigTy = getLLTForType(*Args[i].Ty, DL); @@ -494,10 +491,12 @@ // Expected to be multiple regs for a single incoming arg. // There should be Regs.size() ArgLocs per argument. unsigned NumArgRegs = Args[i].Regs.size(); - + MachineRegisterInfo &MRI = MF.getRegInfo(); assert((j + (NumArgRegs - 1)) < ArgLocs.size() && "Too many regs for number of args"); for (unsigned Part = 0; Part < NumArgRegs; ++Part) { + Register ArgReg = Args[i].Regs[Part]; + LLT ArgRegTy = MRI.getType(ArgReg); // There should be Regs.size() ArgLocs per argument. VA = ArgLocs[j + Part]; if (VA.isMemLoc()) { @@ -538,8 +537,7 @@ } // This ArgLoc covers multiple pieces, so we need to split it. - Register NewReg = - MIRBuilder.getMRI()->createGenericVirtualRegister(VATy); + Register NewReg = MRI.createGenericVirtualRegister(VATy); Handler.assignValueToReg(NewReg, VA.getLocReg(), VA); // If it's a vector type, we either need to truncate the elements // or do an unmerge to get the lower block of elements. @@ -554,12 +552,13 @@ SmallVector DstRegs = {ArgReg}; unsigned NumParts = VATy.getNumElements() / OrigVT.getVectorNumElements(); - for (unsigned Idx = 0; Idx < NumParts-1; ++Idx) - DstRegs.push_back( - MIRBuilder.getMRI()->createGenericVirtualRegister(OrigTy)); + for (unsigned Idx = 0; Idx < NumParts - 1; ++Idx) + DstRegs.push_back(MRI.createGenericVirtualRegister(OrigTy)); MIRBuilder.buildUnmerge(DstRegs, {NewReg}); - } else { + } else if (VATy.getScalarSizeInBits() > ArgRegTy.getScalarSizeInBits()) { MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0); + } else { + MIRBuilder.buildCopy(ArgReg, NewReg); } } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s -define i8 @v1s8_add(<1 x i8> %a0) nounwind { +define i8 @v1s8_add(<1 x i8> %a0) { ; CHECK-LABEL: name: v1s8_add ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $d0 @@ -13,3 +13,24 @@ %res = bitcast <1 x i8> %a0 to i8 ret i8 %res } + +define i24 @test_v3i8(<3 x i8> %a) { + ; CHECK-LABEL: name: test_v3i8 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY3]](s32), [[COPY5]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s24) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %res = bitcast <3 x i8> %a to i24 + ret i24 %res +} +