diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -72,8 +72,8 @@ bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; // A lowering phase that runs before any selection attempts. - - void preISelLower(MachineInstr &I) const; + // Returns true if the instruction was modified. + bool preISelLower(MachineInstr &I); // An early selection function that runs before the selectImpl() call. bool earlySelect(MachineInstr &I) const; @@ -81,8 +81,10 @@ bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; /// Eliminate same-sized cross-bank copies into stores before selectImpl(). - void contractCrossBankCopyIntoStore(MachineInstr &I, - MachineRegisterInfo &MRI) const; + bool contractCrossBankCopyIntoStore(MachineInstr &I, + MachineRegisterInfo &MRI); + + bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI); bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; @@ -1332,7 +1334,7 @@ return; } -void AArch64InstructionSelector::preISelLower(MachineInstr &I) const { +bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { MachineBasicBlock &MBB = *I.getParent(); MachineFunction &MF = *MBB.getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -1352,10 +1354,10 @@ const LLT ShiftTy = MRI.getType(ShiftReg); const LLT SrcTy = MRI.getType(SrcReg); if (SrcTy.isVector()) - return; + return false; assert(!ShiftTy.isVector() && "unexpected vector shift ty"); if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64) - return; + return false; auto *AmtMI = MRI.getVRegDef(ShiftReg); assert(AmtMI && "could not find a vreg definition for shift amount"); if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) { @@ -1366,14 +1368,62 @@ MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); I.getOperand(2).setReg(Trunc.getReg(0)); } - return; + return true; } case TargetOpcode::G_STORE: - contractCrossBankCopyIntoStore(I, MRI); - return; + return contractCrossBankCopyIntoStore(I, MRI); + case TargetOpcode::G_PTR_ADD: + return convertPtrAddToAdd(I, MRI); default: - return; + return false; + } +} + +/// This lowering tries to look for G_PTR_ADD instructions and then converts +/// them to a standard G_ADD with a COPY on the source, and G_INTTOPTR on the +/// result. This is ok for address space 0 on AArch64 as p0 can be treated as +/// s64. +/// +/// The motivation behind this is to expose the add semantics to the imported +/// tablegen patterns. We shouldn't need to check for uses being loads/stores, +/// because the selector works bottom up, uses before defs. By the time we +/// end up trying to select a G_PTR_ADD, we should have already attempted to +/// fold this into addressing modes and were therefore unsuccessful. +bool AArch64InstructionSelector::convertPtrAddToAdd( + MachineInstr &I, MachineRegisterInfo &MRI) { + assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); + Register DstReg = I.getOperand(0).getReg(); + Register AddOp1Reg = I.getOperand(1).getReg(); + Register AddOp2Reg = I.getOperand(2).getReg(); + const LLT PtrTy = MRI.getType(DstReg); + if (PtrTy.getAddressSpace() != 0) + return false; + + MachineIRBuilder MIB(I); + const LLT s64 = LLT::scalar(64); + auto PtrToInt = MIB.buildPtrToInt(s64, AddOp1Reg); + auto Add = MIB.buildAdd(s64, PtrToInt, AddOp2Reg); + // Set regbanks on the registers. + MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); + MRI.setRegBank(Add.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); + + // Now turn the %dst = G_PTR_ADD %base, off into: + // %dst = G_INTTOPTR %Add + I.setDesc(TII.get(TargetOpcode::G_INTTOPTR)); + I.getOperand(1).setReg(Add.getReg(0)); + I.RemoveOperand(2); + + // We need to manually call select on these because new instructions don't + // get added to the selection queue. + if (!select(*Add)) { + LLVM_DEBUG(dbgs() << "Failed to select G_ADD in convertPtrAddToAdd"); + return false; } + if (!select(*PtrToInt)) { + LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"); + return false; + } + return true; } bool AArch64InstructionSelector::earlySelectSHL( @@ -1411,8 +1461,8 @@ return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); } -void AArch64InstructionSelector::contractCrossBankCopyIntoStore( - MachineInstr &I, MachineRegisterInfo &MRI) const { +bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( + MachineInstr &I, MachineRegisterInfo &MRI) { assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"); // If we're storing a scalar, it doesn't matter what register bank that // scalar is on. All that matters is the size. @@ -1430,7 +1480,7 @@ // And then continue the selection process normally. MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI); if (!Def) - return; + return false; Register DefDstReg = Def->getOperand(0).getReg(); LLT DefDstTy = MRI.getType(DefDstReg); Register StoreSrcReg = I.getOperand(0).getReg(); @@ -1439,18 +1489,19 @@ // If we get something strange like a physical register, then we shouldn't // go any further. if (!DefDstTy.isValid()) - return; + return false; // Are the source and dst types the same size? if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) - return; + return false; if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == RBI.getRegBank(DefDstReg, MRI, TRI)) - return; + return false; // We have a cross-bank copy, which is entering a store. Let's fold it. I.getOperand(0).setReg(DefDstReg); + return true; } bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const { @@ -1561,7 +1612,9 @@ // Try to do some lowering before we start instruction selecting. These // lowerings are purely transformations on the input G_MIR and so selection // must continue after any modification of the instruction. - preISelLower(I); + if (preISelLower(I)) { + Opcode = I.getOpcode(); // The opcode may have been modified, refresh it. + } // There may be patterns where the importer can't deal with them optimally, // but does select it to a suboptimal sequence so our custom C++ selection diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir @@ -93,10 +93,11 @@ ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY]], [[COPY1]] - ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load 8 from %ir.addr) - ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]] - ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[LDRXui]] + ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY]], [[COPY1]] + ; CHECK: [[COPY2:%[0-9]+]]:gpr64common = COPY [[ADDXrr]] + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY2]], 0 :: (load 8 from %ir.addr) + ; CHECK: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]] + ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY3]], [[LDRXui]] ; CHECK: $x0 = COPY [[ADDXrr1]] ; CHECK: RET_ReallyLR implicit $x0 %0:gpr(p0) = COPY $x0 @@ -386,12 +387,13 @@ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 61, 60 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]] - ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load 8 from %ir.addr) + ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[UBFMXri]] + ; CHECK: [[COPY2:%[0-9]+]]:gpr64common = COPY [[ADDXrr]] + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY2]], 0 :: (load 8 from %ir.addr) ; CHECK: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0 ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[ADDXri]] - ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]] - ; CHECK: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[ADDXrr1]] + ; CHECK: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]] + ; CHECK: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY3]], [[ADDXrr1]] ; CHECK: $x2 = COPY [[ADDXrr2]] ; CHECK: RET_ReallyLR implicit $x2 %0:gpr(s64) = COPY $x0 @@ -456,13 +458,13 @@ ; CHECK-LABEL: name: more_than_one_use_shl_lsl_slow ; CHECK: liveins: $x0, $x1, $x2 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[COPY]], 61, 60 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]] - ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load 8 from %ir.addr) - ; CHECK: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load 8 from %ir.addr) - ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[LDRXui1]] - ; CHECK: $x2 = COPY [[ADDXrr1]] + ; CHECK: [[ADDXrs:%[0-9]+]]:gpr64 = ADDXrs [[COPY1]], [[COPY]], 3 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY [[ADDXrs]] + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY2]], 0 :: (load 8 from %ir.addr) + ; CHECK: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui [[COPY2]], 0 :: (load 8 from %ir.addr) + ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[LDRXui1]] + ; CHECK: $x2 = COPY [[ADDXrr]] ; CHECK: RET_ReallyLR implicit $x2 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 3 @@ -493,12 +495,13 @@ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 61, 60 ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 - ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[UBFMXri]] + ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]] + ; CHECK: [[ADDXrs:%[0-9]+]]:gpr64 = ADDXrs [[COPY2]], [[COPY]], 3 ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr) ; CHECK: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0 - ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[ADDXri]] - ; CHECK: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[ADDXrr1]] - ; CHECK: $x2 = COPY [[ADDXrr2]] + ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[ADDXri]] + ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrs]], [[ADDXrr]] + ; CHECK: $x2 = COPY [[ADDXrr1]] ; CHECK: RET_ReallyLR implicit $x2 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 3 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir @@ -10,9 +10,9 @@ ret void } - define i8* @gep(i8* %in) { ret i8* undef } - define i8* @gep_no_constant(i8* %in) { ret i8* undef } - define i8* @gep_bad_imm(i8* %in) { ret i8* undef } + define i8* @ptr_add(i8* %in) { ret i8* undef } + define i8* @ptr_add_no_constant(i8* %in) { ret i8* undef } + define i8* @ptr_add_bad_imm(i8* %in) { ret i8* undef } define i8* @ptr_mask(i8* %in) { ret i8* undef } @@ -53,8 +53,8 @@ ... --- -# CHECK-LABEL: name: gep -name: gep +# CHECK-LABEL: name: ptr_add +name: ptr_add legalized: true regBankSelected: true registers: @@ -63,7 +63,7 @@ - { id: 2, class: gpr } # CHECK: body: -# CHECK: %2:gpr64sp = ADDXri %0, 42, 0 +# CHECK: %{{[0-9]+}}:gpr64sp = ADDXri %{{[0-9]+}}, 42, 0 body: | bb.0: liveins: $x0 @@ -74,8 +74,8 @@ ... --- -# CHECK-LABEL: name: gep_no_constant -name: gep_no_constant +# CHECK-LABEL: name: ptr_add_no_constant +name: ptr_add_no_constant legalized: true regBankSelected: true registers: @@ -84,9 +84,7 @@ - { id: 2, class: gpr } # CHECK: body: -# CHECK: %0:gpr64 = COPY $x0 -# CHECK: %1:gpr64 = COPY $x1 -# CHECK: %2:gpr64 = ADDXrr %0, %1 +# CHECK: %{{[0-9]+}}:gpr64 = ADDXrr %{{[0-9]+}}, %{{[0-9]+}} body: | bb.0: liveins: $x0, $x1 @@ -97,8 +95,8 @@ ... --- -# CHECK-LABEL: name: gep_bad_imm -name: gep_bad_imm +# CHECK-LABEL: name: ptr_add_bad_imm +name: ptr_add_bad_imm legalized: true regBankSelected: true registers: @@ -108,9 +106,9 @@ # CHECK: body: # CHECK: %0:gpr64 = COPY $x0 -# CHECK: %3:gpr32 = MOVi32imm 10000 -# CHECK: %1:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32 -# CHECK: %2:gpr64 = ADDXrr %0, %1 +# CHECK: %5:gpr32 = MOVi32imm 10000 +# CHECK: %1:gpr64 = SUBREG_TO_REG 0, %5, %subreg.sub_32 +# CHECK: %{{[0-9]+}}:gpr64 = ADDXrr %0, %1 body: | bb.0: liveins: $x0, $x1