diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4885,17 +4885,6 @@ if (DstType.isVector()) return UnableToLegalize; - uint64_t SizeOp0 = DstType.getSizeInBits(); - uint64_t NarrowSize = NarrowTy.getSizeInBits(); - - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; - - // Expand in terms of carry-setting/consuming G_E instructions. - int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); - unsigned Opcode = MI.getOpcode(); unsigned OpO, OpE, OpF; switch (Opcode) { @@ -4929,28 +4918,37 @@ unsigned NumDefs = MI.getNumExplicitDefs(); Register Src1 = MI.getOperand(NumDefs).getReg(); Register Src2 = MI.getOperand(NumDefs + 1).getReg(); - Register CarryDst; + Register CarryDst, CarryIn; if (NumDefs == 2) CarryDst = MI.getOperand(1).getReg(); - Register CarryIn; if (MI.getNumOperands() == NumDefs + 3) CarryIn = MI.getOperand(NumDefs + 2).getReg(); - SmallVector Src1Regs, Src2Regs, DstRegs; - extractParts(Src1, NarrowTy, NumParts, Src1Regs); - extractParts(Src2, NarrowTy, NumParts, Src2Regs); + LLT RegTy = MRI.getType(MI.getOperand(0).getReg()); + LLT LeftoverTy, DummyTy; + SmallVector Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs; + extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left); + extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left); - for (int i = 0; i < NumParts; ++i) { - Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); + int NarrowParts = Src1Regs.size(); + for (int I = 0, E = Src1Left.size(); I != E; ++I) { + Src1Regs.push_back(Src1Left[I]); + Src2Regs.push_back(Src2Left[I]); + } + DstRegs.reserve(Src1Regs.size()); + + for (int i = 0, e = Src1Regs.size(); i != e; ++i) { + Register DstReg = + MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i])); Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); // Forward the final carry-out to the destination register - if (i == NumParts - 1 && CarryDst) + if (i == e - 1 && CarryDst) CarryOut = CarryDst; if (!CarryIn) { MIRBuilder.buildInstr(OpO, {DstReg, CarryOut}, {Src1Regs[i], Src2Regs[i]}); - } else if (i == NumParts - 1) { + } else if (i == e - 1) { MIRBuilder.buildInstr(OpF, {DstReg, CarryOut}, {Src1Regs[i], Src2Regs[i], CarryIn}); } else { @@ -4961,7 +4959,10 @@ DstRegs.push_back(DstReg); CarryIn = CarryOut; } - MIRBuilder.buildMerge(DstReg, DstRegs); + insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy, + makeArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy, + makeArrayRef(DstRegs).drop_front(NarrowParts)); + MI.eraseFromParent(); return Legalized; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -80,7 +80,7 @@ br label %block } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(s96) = G_ADD %{{[0-9]+}}:_, %{{[0-9]+}}:_ (in function: nonpow2_add_narrowing) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: %{{[0-9]+}}:gpr32(s32), %{{[0-9]+}}:gpr(s1) = G_UADDE %{{[0-9]+}}:gpr, %{{[0-9]+}}:gpr, %{{[0-9]+}}:gpr (in function: nonpow2_add_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_add_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_add_narrowing: define void @nonpow2_add_narrowing(i128 %x, i128 %y) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir @@ -73,6 +73,89 @@ %5:_(s64) = G_ANYEXT %4(s8) $x0 = COPY %5(s64) +... +--- +name: test_scalar_add_narrowing +body: | + bb.0.entry: + ; CHECK-LABEL: name: test_scalar_add_narrowing + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0 + ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY [[COPY2]](s64) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](s64), 0 + ; CHECK: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[COPY4]], [[COPY5]] + ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[EXTRACT]], [[EXTRACT1]], [[UADDO1]] + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s64), 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT2]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY [[UADDO]](s64) + ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[ANYEXT]](s64), 0 + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT3]](s32) + ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[ANYEXT1]], [[UADDE]](s32), 0 + ; CHECK: $x0 = COPY [[COPY6]](s64) + ; CHECK: $x1 = COPY [[INSERT]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %3:_(s64) = COPY $x3 + %4:_(s128) = G_MERGE_VALUES %0(s64), %1(s64) + %5:_(s128) = G_MERGE_VALUES %2(s64), %3(s64) + %6:_(s96) = G_TRUNC %4(s128) + %7:_(s96) = G_TRUNC %5(s128) + %8:_(s96) = G_ADD %6, %7 + %9:_(s128) = G_ANYEXT %8(s96) + %10:_(s64), %11:_(s64) = G_UNMERGE_VALUES %9(s128) + $x0 = COPY %10(s64) + $x1 = COPY %11(s64) + +... +--- +name: test_scalar_add_narrowing_s65 +body: | + bb.0.entry: + ; CHECK-LABEL: name: test_scalar_add_narrowing_s65 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s1) = G_EXTRACT [[COPY1]](s64), 0 + ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY [[COPY2]](s64) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s1) = G_EXTRACT [[COPY3]](s64), 0 + ; CHECK: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[COPY4]], [[COPY5]] + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[EXTRACT]](s1) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[EXTRACT1]](s1) + ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[ZEXT]], [[ZEXT1]], [[UADDO1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[UADDE]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s1) = COPY [[TRUNC]](s1) + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s1) = G_EXTRACT [[DEF]](s64), 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT2]](s1) + ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY [[UADDO]](s64) + ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s1) = G_EXTRACT [[ANYEXT]](s64), 0 + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT3]](s1) + ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[ANYEXT1]], [[COPY6]](s1), 0 + ; CHECK: $x0 = COPY [[COPY7]](s64) + ; CHECK: $x1 = COPY [[INSERT]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %3:_(s64) = COPY $x3 + %4:_(s128) = G_MERGE_VALUES %0(s64), %1(s64) + %5:_(s128) = G_MERGE_VALUES %2(s64), %3(s64) + %6:_(s65) = G_TRUNC %4(s128) + %7:_(s65) = G_TRUNC %5(s128) + %8:_(s65) = G_ADD %6, %7 + %9:_(s128) = G_ANYEXT %8(s65) + %10:_(s64), %11:_(s64) = G_UNMERGE_VALUES %9(s128) + $x0 = COPY %10(s64) + $x1 = COPY %11(s64) + ... --- name: test_vector_add