Index: llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1128,6 +1128,8 @@ // FIXME: Don't know how to handle secondary types yet. if (TypeIdx != 0) return UnableToLegalize; + + MIRBuilder.setInstr(MI); switch (MI.getOpcode()) { default: return UnableToLegalize; @@ -1141,8 +1143,6 @@ if (Size % NarrowSize != 0) return UnableToLegalize; - MIRBuilder.setInstr(MI); - SmallVector Src1Regs, Src2Regs, DstRegs; extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); @@ -1157,6 +1157,48 @@ MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_LOAD: + case TargetOpcode::G_STORE: { + bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; + unsigned ValReg = MI.getOperand(0).getReg(); + unsigned AddrReg = MI.getOperand(1).getReg(); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + unsigned Size = MRI.getType(ValReg).getSizeInBits(); + unsigned NumParts = Size / NarrowSize; + + SmallVector NarrowRegs; + if (!IsLoad) + extractParts(ValReg, NarrowTy, NumParts, NarrowRegs); + + const LLT OffsetTy = + LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); + MachineFunction &MF = *MI.getMF(); + MachineMemOperand *MMO = *MI.memoperands_begin(); + for (unsigned Idx = 0; Idx < NumParts; ++Idx) { + unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8; + unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment); + unsigned NewAddrReg = 0; + MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment); + MachineMemOperand &NewMMO = *MF.getMachineMemOperand( + MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(), + NarrowTy.getSizeInBits() / 8, Alignment); + if (IsLoad) { + unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy); + NarrowRegs.push_back(Dst); + MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO); + } else { + MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO); + } + } + if (IsLoad) { + if (NarrowTy.isVector()) + MIRBuilder.buildConcatVectors(ValReg, NarrowRegs); + else + MIRBuilder.buildBuildVector(ValReg, NarrowRegs); + } + MI.eraseFromParent(); + return Legalized; + } } } Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-load-fewerElts.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-load-fewerElts.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-load-fewerElts.mir @@ -1,39 +0,0 @@ -# RUN: llc -march=aarch64 -o - -run-pass=legalizer -global-isel-abort=0 -debug-only=legalizer 2>&1 %s | FileCheck %s -# REQUIRES: asserts - -# CHECK: Legalize Machine IR for: load_v4s32 -# CHECK-NEXT: %{{[0-9]+}}:_(<4 x s32>) = G_LOAD %{{[0-9]+}}:_(p0) -# CHECK-NEXT: Reduce number of elements ---- -name: load_v4s32 -legalized: false -tracksRegLiveness: true -body: | - bb.1: - liveins: $x0 - - %0:_(p0) = COPY $x0 - %1:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16, align 4) - %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %1(<4 x s32>) - $w0 = COPY %5(s32) - -... - -# Make sure we are able to scalarize v2s64. -# CHECK: Legalize Machine IR for: load_v2s64 -# CHECK-NEXT: %{{[0-9]+}}:_(<2 x s64>) = G_LOAD %{{[0-9]+}}:_(p0) -# CHECK-NEXT: Reduce number of elements ---- -name: load_v2s64 -legalized: false -tracksRegLiveness: true -body: | - bb.1: - liveins: $x0 - - %0:_(p0) = COPY $x0 - %1:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16) - %2:_(s64), %3:_(s64) = G_UNMERGE_VALUES %1(<2 x s64>) - $x0 = COPY %3(s64) - -... Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-load-store-fewerElts.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-load-store-fewerElts.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-load-store-fewerElts.mir @@ -0,0 +1,54 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=aarch64 -o - -run-pass=legalizer %s | FileCheck %s +--- +name: load_v4s32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: load_v4s32 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8, align 16) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[GEP]](p0) :: (load 8) + ; CHECK: G_STORE [[LOAD]](<2 x s32>), [[COPY1]](p0) :: (store 8, align 16) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY1]], [[C1]](s64) + ; CHECK: G_STORE [[LOAD1]](<2 x s32>), [[GEP1]](p0) :: (store 8) + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16) + G_STORE %2(<4 x s32>), %1(p0) :: (store 16) + +... +--- +name: load_v2s64 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: load_v2s64 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 16) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8) + ; CHECK: G_STORE [[LOAD]](s64), [[COPY1]](p0) :: (store 8, align 16) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY1]], [[C1]](s64) + ; CHECK: G_STORE [[LOAD1]](s64), [[GEP1]](p0) :: (store 8) + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16) + G_STORE %2(<2 x s64>), %1(p0) :: (store 16) + +...