Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -3004,6 +3004,22 @@ def ro_Xindexed64 : ComplexPattern", []>; def ro_Xindexed128 : ComplexPattern", []>; +def gi_ro_Xindexed8 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_ro_Xindexed16 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_ro_Xindexed32 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_ro_Xindexed64 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_ro_Xindexed128 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; + def ro_Windexed8 : ComplexPattern", []>; def ro_Windexed16 : ComplexPattern", []>; def ro_Windexed32 : ComplexPattern", []>; Index: llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -76,7 +76,6 @@ bool earlySelect(MachineInstr &I) const; bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; - bool earlySelectLoad(MachineInstr &I, MachineRegisterInfo &MRI) const; /// Eliminate same-sized cross-bank copies into stores before selectImpl(). void contractCrossBankCopyIntoStore(MachineInstr &I, @@ -208,6 +207,10 @@ ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, unsigned SizeInBytes) const; + template + ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { + return selectAddrModeXRO(Root, Width / 8); + } ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const; @@ -1246,57 +1249,6 @@ I.getOperand(0).setReg(DefDstReg); } -bool AArch64InstructionSelector::earlySelectLoad( - MachineInstr &I, MachineRegisterInfo &MRI) const { - // Try to fold in shifts, etc into the addressing mode of a load. - assert(I.getOpcode() == TargetOpcode::G_LOAD && "unexpected op"); - - // Don't handle atomic loads/stores yet. - auto &MemOp = **I.memoperands_begin(); - if (MemOp.isAtomic()) { - LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n"); - return false; - } - - unsigned MemBytes = MemOp.getSize(); - - // Only support 64-bit loads for now. - if (MemBytes != 8) - return false; - - Register DstReg = I.getOperand(0).getReg(); - const LLT DstTy = MRI.getType(DstReg); - // Don't handle vectors. - if (DstTy.isVector()) - return false; - - unsigned DstSize = DstTy.getSizeInBits(); - // TODO: 32-bit destinations. - if (DstSize != 64) - return false; - - // Check if we can do any folding from GEPs/shifts etc. into the load. - auto ImmFn = selectAddrModeXRO(I.getOperand(1), MemBytes); - if (!ImmFn) - return false; - - // We can fold something. Emit the load here. - MachineIRBuilder MIB(I); - - // Choose the instruction based off the size of the element being loaded, and - // whether or not we're loading into a FPR. - const RegisterBank &RB = *RBI.getRegBank(DstReg, MRI, TRI); - unsigned Opc = - RB.getID() == AArch64::GPRRegBankID ? AArch64::LDRXroX : AArch64::LDRDroX; - // Construct the load. - auto LoadMI = MIB.buildInstr(Opc, {DstReg}, {}); - for (auto &RenderFn : *ImmFn) - RenderFn(LoadMI); - LoadMI.addMemOperand(*I.memoperands_begin()); - I.eraseFromParent(); - return constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI); -} - bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -1308,8 +1260,6 @@ switch (I.getOpcode()) { case TargetOpcode::G_SHL: return earlySelectSHL(I, MRI); - case TargetOpcode::G_LOAD: - return earlySelectLoad(I, MRI); case TargetOpcode::G_CONSTANT: { bool IsZero = false; if (I.getOperand(1).isCImm()) @@ -4342,12 +4292,16 @@ // We can use the LHS of the GEP as the base, and the LHS of the shift as an // offset. Signify that we are shifting by setting the shift flag to 1. - return {{ - [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); }, - [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(1); }, - }}; + return {{[=](MachineInstrBuilder &MIB) { + MIB.addUse(Gep->getOperand(1).getReg()); + }, + [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); }, + [=](MachineInstrBuilder &MIB) { + // Need to add both immediates here to make sure that they are both + // added to the instruction. + MIB.addImm(0); + MIB.addImm(1); + }}}; } /// This is used for computing addresses like this: @@ -4375,12 +4329,18 @@ return None; // Base is the GEP's LHS, offset is its RHS. - return {{ - [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); }, - [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(2)); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, - }}; + return {{[=](MachineInstrBuilder &MIB) { + MIB.addUse(Gep->getOperand(1).getReg()); + }, + [=](MachineInstrBuilder &MIB) { + MIB.addUse(Gep->getOperand(2).getReg()); + }, + [=](MachineInstrBuilder &MIB) { + // Need to add both immediates here to make sure that they are both + // added to the instruction. + MIB.addImm(0); + MIB.addImm(0); + }}}; } /// This is intended to be equivalent to selectAddrModeXRO in Index: llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir @@ -18,6 +18,11 @@ define void @more_than_one_use_shl_lsl_fast(i64* %addr) #1 { ret void } define void @more_than_one_use_shl_lsl_slow(i64* %addr) { ret void } define void @more_than_one_use_shl_minsize(i64* %addr) #0 { ret void } + define void @ldrwrox(i64* %addr) { ret void } + define void @ldrsrox(i64* %addr) { ret void } + define void @ldrhrox(i64* %addr) { ret void } + define void @ldbbrox(i64* %addr) { ret void } + define void @ldrqrox(i64* %addr) { ret void } attributes #0 = { optsize minsize } attributes #1 = { "target-features"="+lsl-fast" } ... @@ -72,7 +77,6 @@ $d0 = COPY %4(s64) RET_ReallyLR implicit $d0 ... - --- name: more_than_one_use alignment: 2 @@ -506,3 +510,123 @@ %9:gpr(s64) = G_ADD %8, %7 $x2 = COPY %9(s64) RET_ReallyLR implicit $x2 +... +--- +name: ldrwrox +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + ; CHECK-LABEL: name: ldrwrox + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], [[COPY1]], 0, 0 :: (load 4 from %ir.addr) + ; CHECK: $w2 = COPY [[LDRWroX]] + ; CHECK: RET_ReallyLR implicit $w2 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(p0) = G_GEP %0, %1 + %4:gpr(s32) = G_LOAD %2(p0) :: (load 4 from %ir.addr) + $w2 = COPY %4(s32) + RET_ReallyLR implicit $w2 +... +--- +name: ldrsrox +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $x1 + ; CHECK-LABEL: name: ldrsrox + ; CHECK: liveins: $d0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[LDRSroX:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], [[COPY1]], 0, 0 :: (load 4 from %ir.addr) + ; CHECK: $s2 = COPY [[LDRSroX]] + ; CHECK: RET_ReallyLR implicit $h2 + %0:gpr(p0) = COPY $d0 + %1:gpr(s64) = COPY $x1 + %2:gpr(p0) = G_GEP %0, %1 + %4:fpr(s32) = G_LOAD %2(p0) :: (load 4 from %ir.addr) + $s2 = COPY %4(s32) + RET_ReallyLR implicit $h2 +... +--- +name: ldrhrox +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + ; CHECK-LABEL: name: ldrhrox + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[LDRHroX:%[0-9]+]]:fpr16 = LDRHroX [[COPY]], [[COPY1]], 0, 0 :: (load 2 from %ir.addr) + ; CHECK: $h2 = COPY [[LDRHroX]] + ; CHECK: RET_ReallyLR implicit $h2 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(p0) = G_GEP %0, %1 + %4:fpr(s16) = G_LOAD %2(p0) :: (load 2 from %ir.addr) + $h2 = COPY %4(s16) + RET_ReallyLR implicit $h2 +... +--- +name: ldbbrox +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + ; CHECK-LABEL: name: ldbbrox + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[LDRBBroX:%[0-9]+]]:gpr32 = LDRBBroX [[COPY]], [[COPY1]], 0, 0 :: (load 1 from %ir.addr) + ; CHECK: $w2 = COPY [[LDRBBroX]] + ; CHECK: RET_ReallyLR implicit $w2 + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(p0) = G_GEP %0, %1 + %4:gpr(s32) = G_LOAD %2(p0) :: (load 1 from %ir.addr) + $w2 = COPY %4(s32) + RET_ReallyLR implicit $w2 +... +--- +name: ldrqrox +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $x1 + ; CHECK-LABEL: name: ldrqrox + ; CHECK: liveins: $d0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[LDRQroX:%[0-9]+]]:fpr128 = LDRQroX [[COPY]], [[COPY1]], 0, 0 :: (load 16 from %ir.addr) + ; CHECK: $q0 = COPY [[LDRQroX]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:gpr(p0) = COPY $d0 + %1:gpr(s64) = COPY $x1 + %2:gpr(p0) = G_GEP %0, %1 + %4:fpr(<2 x s64>) = G_LOAD %2(p0) :: (load 16 from %ir.addr) + $q0 = COPY %4(<2 x s64>) + RET_ReallyLR implicit $q0 Index: llvm/test/CodeGen/AArch64/GlobalISel/store-addressing-modes.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/store-addressing-modes.mir @@ -0,0 +1,168 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- | + define void @strxrox(i64* %addr) { ret void } + define void @strdrox(i64* %addr) { ret void } + define void @strwrox(i64* %addr) { ret void } + define void @strsrox(i64* %addr) { ret void } + define void @strhrox(i64* %addr) { ret void } + define void @strqrox(i64* %addr) { ret void } + define void @shl(i64* %addr) { ret void } +... + +--- +name: strxrox +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: strxrox + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK: STRXroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 8 into %ir.addr) + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %ptr:gpr(p0) = G_GEP %0, %1 + %3:gpr(s64) = COPY $x2 + G_STORE %3, %ptr :: (store 8 into %ir.addr) +... +--- +name: strdrox +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $d2 + ; CHECK-LABEL: name: strdrox + ; CHECK: liveins: $x0, $x1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK: STRDroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 8 into %ir.addr) + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %ptr:gpr(p0) = G_GEP %0, %1 + %3:fpr(s64) = COPY $d2 + G_STORE %3, %ptr :: (store 8 into %ir.addr) +... +--- +name: strwrox +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $w2 + ; CHECK-LABEL: name: strwrox + ; CHECK: liveins: $x0, $x1, $w2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2 + ; CHECK: STRWroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 4 into %ir.addr) + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %ptr:gpr(p0) = G_GEP %0, %1 + %3:gpr(s32) = COPY $w2 + G_STORE %3, %ptr :: (store 4 into %ir.addr) +... +--- +name: strsrox +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $s2 + ; CHECK-LABEL: name: strsrox + ; CHECK: liveins: $x0, $x1, $s2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s2 + ; CHECK: STRSroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 4 into %ir.addr) + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %ptr:gpr(p0) = G_GEP %0, %1 + %3:fpr(s32) = COPY $s2 + G_STORE %3, %ptr :: (store 4 into %ir.addr) +... +--- +name: strhrox +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $h0 + ; CHECK-LABEL: name: strhrox + ; CHECK: liveins: $x0, $x1, $h0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY $h0 + ; CHECK: STRHroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 2 into %ir.addr) + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %ptr:gpr(p0) = G_GEP %0, %1 + %3:fpr(s16) = COPY $h0 + G_STORE %3, %ptr :: (store 2 into %ir.addr) +... +--- +name: strqrox +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $q2 + ; CHECK-LABEL: name: strqrox + ; CHECK: liveins: $x0, $x1, $q2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q2 + ; CHECK: STRQroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 16 into %ir.addr) + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %ptr:gpr(p0) = G_GEP %0, %1 + %2:fpr(<2 x s64>) = COPY $q2 + G_STORE %2, %ptr :: (store 16 into %ir.addr) +... +--- +name: shl +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: shl + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK: STRXroX [[COPY2]], [[COPY1]], [[COPY]], 0, 1 :: (store 8 into %ir.addr) + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 3 + %2:gpr(s64) = G_SHL %0, %1(s64) + %3:gpr(p0) = COPY $x1 + %ptr:gpr(p0) = G_GEP %3, %2 + %4:gpr(s64) = COPY $x2 + G_STORE %4, %ptr :: (store 8 into %ir.addr) Index: llvm/test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll +++ llvm/test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll @@ -1,6 +1,6 @@ ; fastisel should not fold add with non-pointer bitwidth ; sext(a) + sext(b) != sext(a + b) -; RUN: llc -mtriple=arm64-apple-darwin %s -O0 -o - | FileCheck %s +; RUN: llc -fast-isel -mtriple=arm64-apple-darwin %s -O0 -o - | FileCheck %s define zeroext i8 @gep_promotion(i8* %ptr) nounwind uwtable ssp { entry: