diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -1658,11 +1658,10 @@ case TargetOpcode::G_BITCAST: // Imported SelectionDAG rules can handle every bitcast except those that // bitcast from a type to the same type. Ideally, these shouldn't occur - // but we might not run an optimizer that deletes them. - if (MRI.getType(I.getOperand(0).getReg()) == - MRI.getType(I.getOperand(1).getReg())) - return selectCopy(I, TII, MRI, TRI, RBI); - return false; + // but we might not run an optimizer that deletes them. The other exception + // is bitcasts involving pointer types, as SelectionDAG has no knowledge + // of them. + return selectCopy(I, TII, MRI, TRI, RBI); case TargetOpcode::G_SELECT: { if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) { diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h --- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h @@ -34,6 +34,9 @@ private: bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const; + bool legalizeLoadStore(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder, + GISelChangeObserver &Observer) const; }; } // End llvm namespace. #endif diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -21,6 +21,8 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" +#define DEBUG_TYPE "aarch64-legalinfo" + using namespace llvm; using namespace LegalizeActions; using namespace LegalizeMutations; @@ -208,13 +210,22 @@ // Lower anything left over into G_*EXT and G_LOAD .lower(); + auto IsPtrVecPred = [=](const LegalityQuery &Query) { + const LLT &ValTy = Query.Types[0]; + if (!ValTy.isVector()) + return false; + const LLT EltTy = ValTy.getElementType(); + return EltTy.isPointer() && EltTy.getAddressSpace() == 0; + }; + getActionDefinitionsBuilder(G_LOAD) .legalForTypesWithMemDesc({{s8, p0, 8, 8}, {s16, p0, 16, 8}, {s32, p0, 32, 8}, {s64, p0, 64, 8}, {p0, p0, 64, 8}, - {v2s32, p0, 64, 8}}) + {v2s32, p0, 64, 8}, + {v2s64, p0, 128, 8}}) // These extends are also legal .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}}) @@ -228,7 +239,8 @@ return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; }) .clampMaxNumElements(0, s32, 2) - .clampMaxNumElements(0, s64, 1); + .clampMaxNumElements(0, s64, 1) + .customIf(IsPtrVecPred); getActionDefinitionsBuilder(G_STORE) .legalForTypesWithMemDesc({{s8, p0, 8, 8}, @@ -248,7 +260,8 @@ Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; }) .clampMaxNumElements(0, s32, 2) - .clampMaxNumElements(0, s64, 1); + .clampMaxNumElements(0, s64, 1) + .customIf(IsPtrVecPred); // Constants getActionDefinitionsBuilder(G_CONSTANT) @@ -357,7 +370,8 @@ // number of bits but it's what the previous code described and fixing // it breaks tests. .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8, - v8s16, v4s16, v2s16, v4s32, v2s32, v2s64}); + v8s16, v4s16, v2s16, v4s32, v2s32, v2s64, + v2p0}); getActionDefinitionsBuilder(G_VASTART).legalFor({p0}); @@ -541,11 +555,53 @@ return false; case TargetOpcode::G_VAARG: return legalizeVaArg(MI, MRI, MIRBuilder); + case TargetOpcode::G_LOAD: + case TargetOpcode::G_STORE: + return legalizeLoadStore(MI, MRI, MIRBuilder, Observer); } llvm_unreachable("expected switch to return"); } +bool AArch64LegalizerInfo::legalizeLoadStore( + MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, + GISelChangeObserver &Observer) const { + assert(MI.getOpcode() == TargetOpcode::G_STORE || + MI.getOpcode() == TargetOpcode::G_LOAD); + // Here we just try to handle vector loads/stores where our value type might + // have pointer elements, which the SelectionDAG importer can't handle. To + // allow the existing patterns for s64 to fire for p0, we just try to bitcast + // the value to use s64 types. + + // Custom legalization requires the instruction, if not deleted, must be fully + // legalized. In order to allow further legalization of the inst, we create + // a new instruction and erase the existing one. + + unsigned ValReg = MI.getOperand(0).getReg(); + const LLT ValTy = MRI.getType(ValReg); + + if (!ValTy.isVector() || !ValTy.getElementType().isPointer() || + ValTy.getElementType().getAddressSpace() != 0) { + LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store"); + return false; + } + + MIRBuilder.setInstr(MI); + unsigned PtrSize = ValTy.getElementType().getSizeInBits(); + const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize); + auto &MMO = **MI.memoperands_begin(); + if (MI.getOpcode() == TargetOpcode::G_STORE) { + auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg}); + MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO); + } else { + unsigned NewReg = MRI.createGenericVirtualRegister(NewTy); + auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO); + MIRBuilder.buildBitcast({ValReg}, {NewLoad}); + } + MI.eraseFromParent(); + return true; +} + bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -158,7 +158,7 @@ br label %block } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %2:_(<2 x p0>), %1:_(p0) :: (store 16 into `<2 x i16*>* undef`) (in function: vector_of_pointers_insertelement) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %2:_(<2 x p0>) = G_INSERT_VECTOR_ELT %0:_, %3:_(p0), %5:_(s32) (in function: vector_of_pointers_insertelement) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_insertelement ; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_insertelement: define void @vector_of_pointers_insertelement() { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-fewerElts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-fewerElts.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-fewerElts.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-fewerElts.mir @@ -38,12 +38,8 @@ ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 16) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[COPY1]](p0) :: (store 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CHECK: G_STORE [[LOAD]](<2 x s64>), [[COPY1]](p0) :: (store 16) %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector-of-ptr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector-of-ptr.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector-of-ptr.mir @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -march=aarch64 -run-pass=legalizer %s -o - | FileCheck %s +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64" + + define void @store_v2p0(<2 x i8*> %v, <2 x i8*>* %ptr) { + store <2 x i8*> %v, <2 x i8*>* %ptr + ret void + } + + define <2 x i8*> @load_v2p0(<2 x i8*>* %ptr) { + %v = load <2 x i8*>, <2 x i8*>* %ptr + ret <2 x i8*> %v + } + + define void @load_v2p1(<2 x i8*>* %ptr) { ret void } + +... +--- +name: store_v2p0 +alignment: 2 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $q0, $x0 + + ; CHECK-LABEL: name: store_v2p0 + ; CHECK: liveins: $q0, $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[COPY]](<2 x p0>) + ; CHECK: G_STORE [[BITCAST]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.ptr) + ; CHECK: RET_ReallyLR + %0:_(<2 x p0>) = COPY $q0 + %1:_(p0) = COPY $x0 + G_STORE %0(<2 x p0>), %1(p0) :: (store 16 into %ir.ptr) + RET_ReallyLR + +... +--- +name: load_v2p0 +alignment: 2 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $x0 + + ; CHECK-LABEL: name: load_v2p0 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.ptr) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD]](<2 x s64>) + ; CHECK: $q0 = COPY [[BITCAST]](<2 x p0>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(p0) = COPY $x0 + %1:_(<2 x p0>) = G_LOAD %0(p0) :: (load 16 from %ir.ptr) + $q0 = COPY %1(<2 x p0>) + RET_ReallyLR implicit $q0 + +... +--- +name: load_v2p1 +alignment: 2 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $x0 + + ; Check that we don't try to bitcast vectors of pointers w/ non-zero addrspaces. + + ; CHECK-LABEL: name: load_v2p1 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.ptr) + ; CHECK: $q0 = COPY [[LOAD]](<2 x p1>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(p0) = COPY $x0 + %1:_(<2 x p1>) = G_LOAD %0(p0) :: (load 16 from %ir.ptr) + $q0 = COPY %1(<2 x p1>) + RET_ReallyLR implicit $q0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-load-store-vector-of-ptr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-load-store-vector-of-ptr.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-load-store-vector-of-ptr.mir @@ -0,0 +1,73 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -O0 -run-pass=instruction-select -verify-machineinstrs %s -global-isel-abort=1 -o - | FileCheck %s +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64" + + define void @store_v2p0(<2 x i8*> %v, <2 x i8*>* %ptr) { + store <2 x i8*> %v, <2 x i8*>* %ptr + ret void + } + + define <2 x i8*> @load_v2p0(<2 x i8*>* %ptr) { + %v = load <2 x i8*>, <2 x i8*>* %ptr + ret <2 x i8*> %v + } + +... +--- +name: store_v2p0 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: gpr } + - { id: 2, class: fpr } +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $q0, $x0 + + ; CHECK-LABEL: name: store_v2p0 + ; CHECK: liveins: $q0, $x0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: STRQui [[COPY]], [[COPY1]], 0 :: (store 16 into %ir.ptr) + ; CHECK: RET_ReallyLR + %0:fpr(<2 x p0>) = COPY $q0 + %1:gpr(p0) = COPY $x0 + %2:fpr(<2 x s64>) = G_BITCAST %0(<2 x p0>) + G_STORE %2(<2 x s64>), %1(p0) :: (store 16 into %ir.ptr) + RET_ReallyLR + +... +--- +name: load_v2p0 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $x0 + + ; CHECK-LABEL: name: load_v2p0 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16 from %ir.ptr) + ; CHECK: $q0 = COPY [[LDRQui]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:gpr(p0) = COPY $x0 + %2:fpr(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.ptr) + %1:fpr(<2 x p0>) = G_BITCAST %2(<2 x s64>) + $q0 = COPY %1(<2 x p0>) + RET_ReallyLR implicit $q0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir @@ -32,6 +32,7 @@ define void @load_gep_32_s8_fpr(i8* %addr) { ret void } define void @load_v2s32(i64 *%addr) { ret void } + define void @load_v2s64(i64 *%addr) { ret void } ... --- @@ -112,8 +113,8 @@ ; CHECK-LABEL: name: load_s16_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) - ; CHECK: [[T0:%[0-9]+]]:gpr32all = COPY [[LDRHHui]] - ; CHECK: $w0 = COPY [[T0]] + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]] + ; CHECK: $w0 = COPY [[COPY1]] %0(p0) = COPY $x0 %1(s16) = G_LOAD %0 :: (load 2 from %ir.addr) %2:gpr(s32) = G_ANYEXT %1 @@ -129,7 +130,7 @@ bb.0: liveins: $x0 - ; CHECK-LABEL: name: load_s8_gpr + ; CHECK-LABEL: name: load_s8_gpr_anyext ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1 from %ir.addr) ; CHECK: $w0 = COPY [[LDRBBui]] @@ -154,8 +155,8 @@ ; CHECK-LABEL: name: load_s8_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1 from %ir.addr) - ; CHECK: [[T0:%[0-9]+]]:gpr32all = COPY [[LDRBBui]] - ; CHECK: $w0 = COPY [[T0]] + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRBBui]] + ; CHECK: $w0 = COPY [[COPY1]] %0(p0) = COPY $x0 %1(s8) = G_LOAD %0 :: (load 1 from %ir.addr) %2:gpr(s32) = G_ANYEXT %1 @@ -256,8 +257,8 @@ ; CHECK-LABEL: name: load_gep_64_s16_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 32 :: (load 2 from %ir.addr) - ; CHECK: [[T0:%[0-9]+]]:gpr32all = COPY [[LDRHHui]] - ; CHECK: $w0 = COPY [[T0]] + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]] + ; CHECK: $w0 = COPY [[COPY1]] %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 64 %2(p0) = G_GEP %0, %1 @@ -284,8 +285,8 @@ ; CHECK-LABEL: name: load_gep_1_s8_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 1 :: (load 1 from %ir.addr) - ; CHECK: [[T0:%[0-9]+]]:gpr32all = COPY [[LDRBBui]] - ; CHECK: $w0 = COPY [[T0]] + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRBBui]] + ; CHECK: $w0 = COPY [[COPY1]] %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 1 %2(p0) = G_GEP %0, %1 @@ -506,3 +507,24 @@ %1(<2 x s32>) = G_LOAD %0 :: (load 8 from %ir.addr) $d0 = COPY %1(<2 x s32>) ... +--- +name: load_v2s64 +legalized: true +regBankSelected: true + +registers: + - { id: 0, class: gpr } + - { id: 1, class: fpr } + +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: load_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16 from %ir.addr) + ; CHECK: $q0 = COPY [[LDRQui]] + %0(p0) = COPY $x0 + %1(<2 x s64>) = G_LOAD %0 :: (load 16 from %ir.addr) + $q0 = COPY %1(<2 x s64>) +...