Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1072,6 +1072,8 @@ // FIXME: Don't know how to handle secondary types yet. if (TypeIdx != 0) return UnableToLegalize; + + MIRBuilder.setInstr(MI); switch (MI.getOpcode()) { default: return UnableToLegalize; @@ -1085,8 +1087,6 @@ if (Size % NarrowSize != 0) return UnableToLegalize; - MIRBuilder.setInstr(MI); - SmallVector Src1Regs, Src2Regs, DstRegs; extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); @@ -1101,6 +1101,44 @@ MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_LOAD: + case TargetOpcode::G_STORE: { + bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; + unsigned ValReg = MI.getOperand(0).getReg(); + unsigned AddrReg = MI.getOperand(1).getReg(); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + unsigned Size = MRI.getType(ValReg).getSizeInBits(); + unsigned NumParts = Size / NarrowSize; + + SmallVector NarrowRegs; + if (!IsLoad) + extractParts(ValReg, NarrowTy, NumParts, NarrowRegs); + + const LLT OffsetTy = + LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); + MachineFunction &MF = *MI.getMF(); + MachineMemOperand *MMO = *MI.memoperands_begin(); + for (unsigned Idx = 0; Idx < NumParts; ++Idx) { + unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8; + unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment); + unsigned NewAddrReg = 0; + MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment); + MachineMemOperand &NewMMO = *MF.getMachineMemOperand( + MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(), + NarrowTy.getSizeInBits() / 8, Alignment); + if (IsLoad) { + unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy); + NarrowRegs.push_back(Dst); + MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO); + } else { + MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO); + } + } + if (IsLoad) + MIRBuilder.buildMerge(ValReg, NarrowRegs); + MI.eraseFromParent(); + return Legalized; + } } } Index: test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir =================================================================== --- test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir +++ test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir @@ -34,6 +34,14 @@ ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD6]](s64), [[LOAD7]](s64) ; CHECK: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[MV]](s128) ; CHECK: $x0 = COPY [[TRUNC]](s64) + ; CHECK: [[LOAD8:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8, align 16) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) + ; CHECK: [[LOAD9:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[GEP1]](p0) :: (load 8) + ; CHECK: [[MV1:%[0-9]+]]:_(<4 x s32>) = G_MERGE_VALUES [[LOAD8]](<2 x s32>), [[LOAD9]](<2 x s32>) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s128) = G_BITCAST [[MV1]](<4 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[BITCAST1]](s128) + ; CHECK: $x0 = COPY [[TRUNC1]](s64) %0:_(p0) = COPY $x0 %1:_(s1) = G_LOAD %0(p0) :: (load 1) %2:_(s32) = G_ANYEXT %1(s1) @@ -57,6 +65,10 @@ %13:_(s128) = G_LOAD %0(p0) :: (load 16) %14:_(s64) = G_TRUNC %13(s128) $x0 = COPY %14(s64) + %15:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16) + %16:_(s128) = G_BITCAST %15(<4 x s32>) + %17:_(s64) = G_TRUNC %16(s128) + $x0 = COPY %17(s64) ... --- @@ -83,10 +95,17 @@ ; CHECK: G_STORE [[PTRTOINT]](s64), [[COPY]](p0) :: (store 8) ; CHECK: G_STORE [[COPY]](p0), [[COPY]](p0) :: (store 8) ; CHECK: [[PTRTOINT1:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p0) + ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[PTRTOINT1]](s64), [[PTRTOINT1]](s64) ; CHECK: G_STORE [[PTRTOINT1]](s64), [[COPY]](p0) :: (store 8, align 16) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; CHECK: G_STORE [[PTRTOINT1]](s64), [[GEP]](p0) :: (store 8) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[MV]](s128) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) + ; CHECK: G_STORE [[UV]](<2 x s32>), [[COPY]](p0) :: (store 8, align 16) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) + ; CHECK: G_STORE [[UV1]](<2 x s32>), [[GEP1]](p0) :: (store 8) %0:_(p0) = COPY $x0 %1:_(s32) = COPY $w1 %2:_(s1) = G_TRUNC %1(s32) @@ -102,4 +121,6 @@ %6:_(s64) = G_PTRTOINT %0(p0) %7:_(s128) = G_MERGE_VALUES %6(s64), %6 G_STORE %7(s128), %0(p0) :: (store 16) + %8:_(<4 x s32>) = G_BITCAST %7(s128) + G_STORE %8(<4 x s32>), %0(p0) :: (store 16) ... Index: test/CodeGen/AArch64/GlobalISel/legalize-load-v4s32.mir =================================================================== --- test/CodeGen/AArch64/GlobalISel/legalize-load-v4s32.mir +++ test/CodeGen/AArch64/GlobalISel/legalize-load-v4s32.mir @@ -1,10 +1,9 @@ -# RUN: not llc -march=aarch64 -o - -run-pass=legalizer -debug-only=legalizer 2>&1 %s | FileCheck %s +# RUN: llc -march=aarch64 -o - -run-pass=legalizer -debug-only=legalizer 2>&1 %s | FileCheck %s # REQUIRES: asserts # CHECK: Legalize Machine IR for: load_v4s32 # CHECK-NEXT: %{{[0-9]+}}:_(<4 x s32>) = G_LOAD %{{[0-9]+}}:_(p0) -# CHECK-NOT: Lower -# CHECK: unable to legalize instruction +# CHECK-NEXT: Reduce number of elements --- name: load_v4s32 legalized: false