diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -5100,37 +5100,43 @@ if (TypeIdx != 0) return UnableToLegalize; - uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - uint64_t NarrowSize = NarrowTy.getSizeInBits(); - - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; - - int NumParts = SizeOp0 / NarrowSize; - - SmallVector SrcRegs, DstRegs; + SmallVector SrcRegs, LeftoverRegs, DstRegs; SmallVector Indexes; - extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); + LLT RegTy = MRI.getType(MI.getOperand(0).getReg()); + LLT LeftoverTy; + extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs, + LeftoverRegs); + for (Register Reg : LeftoverRegs) + SrcRegs.push_back(Reg); + + uint64_t NarrowSize = NarrowTy.getSizeInBits(); Register OpReg = MI.getOperand(2).getReg(); uint64_t OpStart = MI.getOperand(3).getImm(); uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); - for (int i = 0; i < NumParts; ++i) { - unsigned DstStart = i * NarrowSize; + for (int I = 0, E = SrcRegs.size(); I != E; ++I) { + unsigned DstStart = I * NarrowSize; - if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { - // No part of the insert affects this subregister, forward the original. - DstRegs.push_back(SrcRegs[i]); - continue; - } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { + if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { // The entire subregister is defined by this insert, forward the new // value. DstRegs.push_back(OpReg); continue; } + Register SrcReg = SrcRegs[I]; + if (MRI.getType(SrcRegs[I]) == LeftoverTy) { + // The leftover reg is smaller than NarrowTy, so we need to extend it. + SrcReg = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]); + } + + if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { + // No part of the insert affects this subregister, forward the original. + DstRegs.push_back(SrcReg); + continue; + } + // OpSegStart is where this destination segment would start in OpReg if it // extended infinitely in both directions. int64_t ExtractOffset, InsertOffset; @@ -5154,16 +5160,19 @@ } Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); + MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset); DstRegs.push_back(DstReg); } - assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); + uint64_t WideSize = DstRegs.size() * NarrowSize; Register DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) - MIRBuilder.buildBuildVector(DstReg, DstRegs); - else + if (WideSize > RegTy.getSizeInBits()) { + Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize)); + MIRBuilder.buildMerge(MergeReg, DstRegs); + MIRBuilder.buildTrunc(DstReg, MergeReg); + } else MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); return Legalized; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -91,28 +91,6 @@ ret void } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(s96) = G_INSERT %{{[0-9]+}}:_, %{{[0-9]+}}:_(s32), 64 (in function: nonpow2_or_narrowing) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_or_narrowing -; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_or_narrowing: -define void @nonpow2_or_narrowing() { - %a = add i128 undef, undef - %b = trunc i128 %a to i96 - %a2 = add i128 undef, undef - %b2 = trunc i128 %a2 to i96 - %dummy = or i96 %b, %b2 - store i96 %dummy, i96* undef - ret void -} - -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %0:_(s96) = G_INSERT %10:_, %8:_(s32), 64 (in function: nonpow2_load_narrowing) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_load_narrowing -; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_load_narrowing: -define void @nonpow2_load_narrowing() { - %dummy = load i96, i96* undef - store i96 %dummy, i96* undef - ret void -} - ; Currently can't handle vector lengths that aren't an exact multiple of ; natively supported vector lengths. Test that the fall-back works for those. ; FALLBACK-WITH-REPORT-ERR-G_IMPLICIT_DEF-LEGALIZABLE: (FIXME: this is what is expected once we can legalize non-pow-of-2 G_IMPLICIT_DEF) remark: :0:0: unable to legalize instruction: %1:_(<7 x s64>) = G_ADD %0, %0 (in function: nonpow2_vector_add_fewerelements diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir @@ -1,11 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s - ---- | - target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" - target triple = "aarch64--" - define void @test_inserts_nonpow2() { ret void } -... +# RUN: llc -O0 -mtriple=aarch64-- -run-pass=legalizer %s -o - | FileCheck %s --- name: test_inserts_nonpow2 @@ -15,8 +9,12 @@ ; CHECK-LABEL: name: test_inserts_nonpow2 - ; CHECK: [[C:%[0-9]+]]:_(s64) = COPY $x3 - ; CHECK: $x0 = COPY [[C]] + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK: $x0 = COPY [[COPY3]](s64) + ; CHECK: RET_ReallyLR %0:_(s64) = COPY $x0 %1:_(s64) = COPY $x1 %2:_(s64) = COPY $x2 @@ -27,3 +25,61 @@ $x0 = COPY %6 RET_ReallyLR ... +--- +name: test_inserts_s96 +body: | + bb.0: + liveins: $x0, $x1, $x2 + + ; CHECK-LABEL: name: test_inserts_s96 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s32) + ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[ANYEXT]], [[TRUNC]](s32), 0 + ; CHECK: $x0 = COPY [[COPY3]](s64) + ; CHECK: $x1 = COPY [[INSERT]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %3:_(s128) = G_MERGE_VALUES %0:_(s64), %1:_(s64) + %4:_(s96) = G_TRUNC %3(s128) + %5:_(s32) = G_TRUNC %2(s64) + %6:_(s96) = G_INSERT %4, %5(s32), 64 + %7:_(s128) = G_ANYEXT %6(s96) + %8:_(s64), %9:_(s64) = G_UNMERGE_VALUES %7 + $x0 = COPY %8 + $x1 = COPY %9 +... +--- +name: test_inserts_s65 +body: | + bb.0: + liveins: $x0, $x1, $x2 + + ; CHECK-LABEL: name: test_inserts_s65 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s64) + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s1) = G_EXTRACT [[COPY1]](s64), 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s1) + ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[ANYEXT]], [[TRUNC]](s1), 0 + ; CHECK: $x0 = COPY [[COPY3]](s64) + ; CHECK: $x1 = COPY [[INSERT]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %3:_(s128) = G_MERGE_VALUES %0:_(s64), %1:_(s64) + %4:_(s65) = G_TRUNC %3(s128) + %5:_(s1) = G_TRUNC %2(s64) + %6:_(s65) = G_INSERT %4, %5(s1), 64 + %7:_(s128) = G_ANYEXT %6(s65) + %8:_(s64), %9:_(s64) = G_UNMERGE_VALUES %7 + $x0 = COPY %8 + $x1 = COPY %9 +...