diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -4125,21 +4125,52 @@ if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI)) return None; - // Now try to match the G_SHL. - MachineInstr *Shl = - getOpcodeDef(TargetOpcode::G_SHL, Gep->getOperand(2).getReg(), MRI); - if (!Shl || !isWorthFoldingIntoExtendedReg(*Shl, MRI)) + // Now, try to match an opcode which will match our specific offset. + // We want a G_SHL or a G_MUL. + MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI); + if (!OffsetInst) return None; - // Now, try to find the specific G_CONSTANT. - auto ValAndVReg = - getConstantVRegValWithLookThrough(Shl->getOperand(2).getReg(), MRI); - if (!ValAndVReg) + unsigned OffsetOpc = OffsetInst->getOpcode(); + if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) return None; + if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) + return None; + + // Now, try to find the specific G_CONSTANT. Start by assuming that the + // register we will offset is the LHS, and the register containing the + // constant is the RHS. + Register OffsetReg = OffsetInst->getOperand(1).getReg(); + Register ConstantReg = OffsetInst->getOperand(2).getReg(); + auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); + if (!ValAndVReg) { + // We didn't get a constant on the RHS. If the opcode is a shift, then + // we're done. + if (OffsetOpc == TargetOpcode::G_SHL) + return None; + + // If we have a G_MUL, we can use either register. Try looking at the RHS. + std::swap(OffsetReg, ConstantReg); + ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); + if (!ValAndVReg) + return None; + } + // The value must fit into 3 bits, and must be positive. Make sure that is // true. int64_t ImmVal = ValAndVReg->Value; + + // Since we're going to pull this into a shift, the constant value must be + // a power of 2. If we got a multiply, then we need to check this. + if (OffsetOpc == TargetOpcode::G_MUL) { + if (!isPowerOf2_32(ImmVal)) + return None; + + // Got a power of 2. So, the amount we'll shift is the log base-2 of that. + ImmVal = Log2_32(ImmVal); + } + if ((ImmVal & 0x7) != ImmVal) return None; @@ -4152,7 +4183,7 @@ // offset. Signify that we are shifting by setting the shift flag to 1. return {{ [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); }, - [=](MachineInstrBuilder &MIB) { MIB.add(Shl->getOperand(1)); }, + [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); }, [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, [=](MachineInstrBuilder &MIB) { MIB.addImm(1); }, }}; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir @@ -7,6 +7,12 @@ define void @more_than_one_use(i64* %addr) { ret void } define void @ldrxrox_shl(i64* %addr) { ret void } define void @ldrdrox_shl(i64* %addr) { ret void } + define void @ldrxrox_mul_rhs(i64* %addr) { ret void } + define void @ldrdrox_mul_rhs(i64* %addr) { ret void } + define void @ldrxrox_mul_lhs(i64* %addr) { ret void } + define void @ldrdrox_mul_lhs(i64* %addr) { ret void } + define void @mul_not_pow_2(i64* %addr) { ret void } + define void @mul_wrong_pow_2(i64* %addr) { ret void } define void @more_than_one_use_shl_1(i64* %addr) { ret void } define void @more_than_one_use_shl_2(i64* %addr) { ret void } define void @more_than_one_use_shl_lsl_fast(i64* %addr) #1 { ret void } @@ -154,6 +160,176 @@ ... --- +name: ldrxrox_mul_rhs +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: ldrxrox_mul_rhs + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr) + ; CHECK: $x2 = COPY [[LDRXroX]] + ; CHECK: RET_ReallyLR implicit $x2 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 8 + %2:gpr(s64) = G_MUL %0, %1(s64) + %3:gpr(p0) = COPY $x1 + %4:gpr(p0) = G_GEP %3, %2 + %5:gpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + $x2 = COPY %5(s64) + RET_ReallyLR implicit $x2 + +... +--- +name: ldrdrox_mul_rhs +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $d2 + ; CHECK-LABEL: name: ldrdrox_mul_rhs + ; CHECK: liveins: $x0, $x1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr) + ; CHECK: $d2 = COPY [[LDRDroX]] + ; CHECK: RET_ReallyLR implicit $d2 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 8 + %2:gpr(s64) = G_MUL %0, %1(s64) + %3:gpr(p0) = COPY $x1 + %4:gpr(p0) = G_GEP %3, %2 + %5:fpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + $d2 = COPY %5(s64) + RET_ReallyLR implicit $d2 + +... +--- +name: ldrxrox_mul_lhs +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: ldrxrox_mul_lhs + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr) + ; CHECK: $x2 = COPY [[LDRXroX]] + ; CHECK: RET_ReallyLR implicit $x2 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 8 + %2:gpr(s64) = G_MUL %1, %0(s64) + %3:gpr(p0) = COPY $x1 + %4:gpr(p0) = G_GEP %3, %2 + %5:gpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + $x2 = COPY %5(s64) + RET_ReallyLR implicit $x2 + +... +--- +name: ldrdrox_mul_lhs +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $d2 + ; CHECK-LABEL: name: ldrdrox_mul_lhs + ; CHECK: liveins: $x0, $x1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr) + ; CHECK: $d2 = COPY [[LDRDroX]] + ; CHECK: RET_ReallyLR implicit $d2 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 8 + %2:gpr(s64) = G_MUL %1, %0(s64) + %3:gpr(p0) = COPY $x1 + %4:gpr(p0) = G_GEP %3, %2 + %5:fpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + $d2 = COPY %5(s64) + RET_ReallyLR implicit $d2 + +... +--- +name: mul_not_pow_2 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + ; Show that we don't get a shifted load from a mul when we don't have a + ; power of 2. (The bit isn't set on the load.) + liveins: $x0, $x1, $d2 + ; CHECK-LABEL: name: mul_not_pow_2 + ; CHECK: liveins: $x0, $x1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 7 + ; CHECK: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[MOVi64imm]], [[COPY]], $xzr + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[MADDXrrr]], 0, 0 :: (load 8 from %ir.addr) + ; CHECK: $d2 = COPY [[LDRDroX]] + ; CHECK: RET_ReallyLR implicit $d2 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 7 + %2:gpr(s64) = G_MUL %1, %0(s64) + %3:gpr(p0) = COPY $x1 + %4:gpr(p0) = G_GEP %3, %2 + %5:fpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + $d2 = COPY %5(s64) + RET_ReallyLR implicit $d2 + +... +--- +name: mul_wrong_pow_2 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + ; Show that we don't get a shifted load from a mul when we don't have + ; the right power of 2. (The bit isn't set on the load.) + liveins: $x0, $x1, $d2 + ; CHECK-LABEL: name: mul_wrong_pow_2 + ; CHECK: liveins: $x0, $x1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 16 + ; CHECK: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[MOVi64imm]], [[COPY]], $xzr + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[MADDXrrr]], 0, 0 :: (load 8 from %ir.addr) + ; CHECK: $d2 = COPY [[LDRDroX]] + ; CHECK: RET_ReallyLR implicit $d2 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 16 + %2:gpr(s64) = G_MUL %1, %0(s64) + %3:gpr(p0) = COPY $x1 + %4:gpr(p0) = G_GEP %3, %2 + %5:fpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + $d2 = COPY %5(s64) + RET_ReallyLR implicit $d2 + +... +--- name: more_than_one_use_shl_1 alignment: 2 legalized: true