Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -88,6 +88,10 @@ void processPHIs(MachineFunction &MF); bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; + /// Try to select a shift instruction \p I to a variable shift/rotate + /// instruction when something can be folded away. + bool earlySelectShiftAmountMod(MachineInstr &I, + MachineRegisterInfo &MRI) const; /// Eliminate same-sized cross-bank copies into stores before selectImpl(). bool contractCrossBankCopyIntoStore(MachineInstr &I, @@ -1670,6 +1674,78 @@ return true; } +bool AArch64InstructionSelector::earlySelectShiftAmountMod( + MachineInstr &I, MachineRegisterInfo &MRI) const { + // Given a situation like this: + // + // %shiftamt = G_BINOP %something, %cst + // %x = G_SHIFT %reg, %shiftamt + // + // It may be possible to fold the binop into the instruction and produce a + // variable shift. + unsigned GenericOpc = I.getOpcode(); + assert((GenericOpc == TargetOpcode::G_SHL || + GenericOpc == TargetOpcode::G_ASHR || + GenericOpc == TargetOpcode::G_LSHR) && + "Unexpected opcode?"); + MachineIRBuilder MIB(I); + MachineOperand &DstOp = I.getOperand(0); + LLT Ty = MRI.getType(DstOp.getReg()); + unsigned Size = Ty.getSizeInBits(); + if (Ty.isVector() || !(Size == 32 || Size == 64)) + return false; + + // Walk past truncs/extends of the shift amount. + Register ShiftAmt = I.getOperand(2).getReg(); + mi_match(ShiftAmt, MRI, + m_any_of(m_GZExt(m_Reg(ShiftAmt)), m_GAnyExt(m_Reg(ShiftAmt)), + m_GTrunc(m_Reg(ShiftAmt)))); + + // TODO: Handle G_ADD and G_SUB. + // + // Try to match a situation like this: + // + // %cst = G_CONSTANT i32 31 + // %shiftamt = G_AND %and_lhs, %cst + // %x = G_SHL %shl_lhs, %shiftamt + // + // We can avoid producing the G_AND entirely by producing only: + // + // %x = LSLVWr %shl_lhs, %and_lhs + // + // We can do this because LSLVWr will check the shift amount in the bottom + // 5 bits of %and_lhs. The mask in this case, 31, cannot change the value of + // those bottom 5 bits (31 = 11111). So, we don't need to use it at all in + // this shift. + MachineInstr *ShiftAmtDef = getOpcodeDef(TargetOpcode::G_AND, ShiftAmt, MRI); + if (!ShiftAmtDef) + return false; + int64_t MaskImm; + if (!mi_match(ShiftAmtDef->getOperand(2).getReg(), MRI, m_ICst(MaskImm))) + return false; + uint64_t Bits = Size == 32 ? 5 : 6; + if (countTrailingOnes(static_cast(MaskImm)) < Bits) + return false; + + // Narrow/widen the shift amount to match the size of the shift operation. + Register NewShiftAmt = ShiftAmtDef->getOperand(1).getReg(); + NewShiftAmt = Size == 32 ? narrowExtendRegIfNeeded(NewShiftAmt, MIB) + : widenGPRBankRegIfNeeded(NewShiftAmt, 64, MIB); + + const unsigned OpcTable[3][2] = {{AArch64::LSLVWr, AArch64::LSLVXr}, + {AArch64::ASRVWr, AArch64::ASRVXr}, + {AArch64::LSRVWr, AArch64::LSRVXr}}; + unsigned ShiftIdx = GenericOpc == TargetOpcode::G_SHL + ? 0 + : GenericOpc == TargetOpcode::G_ASHR ? 1 : 2; + unsigned NewOpc = OpcTable[ShiftIdx][Size == 64]; + auto NewInst = + MIB.buildInstr(NewOpc, {DstOp}, {I.getOperand(1), NewShiftAmt}); + I.eraseFromParent(); + constrainSelectedInstRegOperands(*NewInst, TII, TRI, RBI); + return true; +} + bool AArch64InstructionSelector::earlySelectSHL( MachineInstr &I, MachineRegisterInfo &MRI) const { // We try to match the immediate variant of LSL, which is actually an alias @@ -1768,7 +1844,12 @@ return true; } case TargetOpcode::G_SHL: - return earlySelectSHL(I, MRI); + if (earlySelectSHL(I, MRI)) + return true; + LLVM_FALLTHROUGH; + case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: + return earlySelectShiftAmountMod(I, MRI); case TargetOpcode::G_CONSTANT: { bool IsZero = false; if (I.getOperand(1).isCImm()) Index: llvm/test/CodeGen/AArch64/GlobalISel/opt-shift-amount-mod.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/opt-shift-amount-mod.mir @@ -0,0 +1,306 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel -run-pass=instruction-select %s -o - | FileCheck %s + +... +--- +name: shl_and_s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1 + ; CHECK-LABEL: name: shl_and_s32 + ; CHECK: liveins: $w0, $w1 + ; CHECK: %binop_lhs:gpr32 = COPY $w0 + ; CHECK: %shift_lhs:gpr32 = COPY $w1 + ; CHECK: %shift:gpr32 = LSLVWr %shift_lhs, %binop_lhs + ; CHECK: $w0 = COPY %shift + ; CHECK: RET_ReallyLR implicit $w0 + %binop_lhs:gpr(s32) = COPY $w0 + %shift_lhs:gpr(s32) = COPY $w1 + %cst:gpr(s32) = G_CONSTANT i32 31 + %binop:gpr(s32) = G_AND %binop_lhs, %cst + %shift:gpr(s32) = G_SHL %shift_lhs, %binop(s32) + $w0 = COPY %shift(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: shl_and_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: shl_and_s64 + ; CHECK: liveins: $x0, $x1 + ; CHECK: %binop_lhs:gpr64 = COPY $x0 + ; CHECK: %shift_lhs:gpr64 = COPY $x1 + ; CHECK: %shift:gpr64 = LSLVXr %shift_lhs, %binop_lhs + ; CHECK: $x0 = COPY %shift + ; CHECK: RET_ReallyLR implicit $x0 + %binop_lhs:gpr(s64) = COPY $x0 + %shift_lhs:gpr(s64) = COPY $x1 + %cst:gpr(s64) = G_CONSTANT i64 63 + %binop:gpr(s64) = G_AND %binop_lhs, %cst + %shift:gpr(s64) = G_SHL %shift_lhs, %binop(s64) + $x0 = COPY %shift(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: ashr_and_s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1 + ; CHECK-LABEL: name: ashr_and_s32 + ; CHECK: liveins: $w0, $w1 + ; CHECK: %binop_lhs:gpr32 = COPY $w0 + ; CHECK: %shift_lhs:gpr32 = COPY $w1 + ; CHECK: %shift:gpr32 = ASRVWr %shift_lhs, %binop_lhs + ; CHECK: $w0 = COPY %shift + ; CHECK: RET_ReallyLR implicit $w0 + %binop_lhs:gpr(s32) = COPY $w0 + %shift_lhs:gpr(s32) = COPY $w1 + %cst:gpr(s32) = G_CONSTANT i32 31 + %binop:gpr(s32) = G_AND %binop_lhs, %cst + %shift:gpr(s32) = G_ASHR %shift_lhs, %binop(s32) + $w0 = COPY %shift(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: ashr_and_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: ashr_and_s64 + ; CHECK: liveins: $x0, $x1 + ; CHECK: %binop_lhs:gpr64 = COPY $x0 + ; CHECK: %shift_lhs:gpr64 = COPY $x1 + ; CHECK: %shift:gpr64 = ASRVXr %shift_lhs, %binop_lhs + ; CHECK: $x0 = COPY %shift + ; CHECK: RET_ReallyLR implicit $x0 + %binop_lhs:gpr(s64) = COPY $x0 + %shift_lhs:gpr(s64) = COPY $x1 + %cst:gpr(s64) = G_CONSTANT i64 63 + %binop:gpr(s64) = G_AND %binop_lhs, %cst + %shift:gpr(s64) = G_ASHR %shift_lhs, %binop(s64) + $x0 = COPY %shift(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: lshr_and_s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1 + ; CHECK-LABEL: name: lshr_and_s32 + ; CHECK: liveins: $w0, $w1 + ; CHECK: %binop_lhs:gpr32 = COPY $w0 + ; CHECK: %shift_lhs:gpr32 = COPY $w1 + ; CHECK: %shift:gpr32 = LSRVWr %shift_lhs, %binop_lhs + ; CHECK: $w0 = COPY %shift + ; CHECK: RET_ReallyLR implicit $w0 + %binop_lhs:gpr(s32) = COPY $w0 + %shift_lhs:gpr(s32) = COPY $w1 + %cst:gpr(s32) = G_CONSTANT i32 31 + %binop:gpr(s32) = G_AND %binop_lhs, %cst + %shift:gpr(s32) = G_LSHR %shift_lhs, %binop(s32) + $w0 = COPY %shift(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: lshr_and_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: lshr_and_s64 + ; CHECK: liveins: $x0, $x1 + ; CHECK: %binop_lhs:gpr64 = COPY $x0 + ; CHECK: %shift_lhs:gpr64 = COPY $x1 + ; CHECK: %shift:gpr64 = LSRVXr %shift_lhs, %binop_lhs + ; CHECK: $x0 = COPY %shift + ; CHECK: RET_ReallyLR implicit $x0 + %binop_lhs:gpr(s64) = COPY $x0 + %shift_lhs:gpr(s64) = COPY $x1 + %cst:gpr(s64) = G_CONSTANT i64 63 + %binop:gpr(s64) = G_AND %binop_lhs, %cst + %shift:gpr(s64) = G_LSHR %shift_lhs, %binop(s64) + $x0 = COPY %shift(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: bad_and_mask +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1 + ; The G_AND can modify the bottom 5 bits used by LSLVWr. The G_AND should + ; not be folded away. + ; + ; CHECK-LABEL: name: bad_and_mask + ; CHECK: liveins: $w0, $w1 + ; CHECK: %binop_lhs:gpr32 = COPY $w0 + ; CHECK: %shift_lhs:gpr32 = COPY $w1 + ; CHECK: %binop:gpr32common = ANDWri %binop_lhs, 1921 + ; CHECK: %shift:gpr32 = LSLVWr %shift_lhs, %binop + ; CHECK: $w0 = COPY %shift + ; CHECK: RET_ReallyLR implicit $w0 + %binop_lhs:gpr(s32) = COPY $w0 + %shift_lhs:gpr(s32) = COPY $w1 + %cst:gpr(s32) = G_CONSTANT i32 12 + %binop:gpr(s32) = G_AND %binop_lhs, %cst + %shift:gpr(s32) = G_SHL %shift_lhs, %binop(s32) + $w0 = COPY %shift(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: walk_past_zext +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $x1 + ; CHECK-LABEL: name: walk_past_zext + ; CHECK: liveins: $w0, $x1 + ; CHECK: %binop_lhs:gpr32all = COPY $w0 + ; CHECK: %shift_lhs:gpr64 = COPY $x1 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %binop_lhs, %subreg.sub_32 + ; CHECK: %shift:gpr64 = LSLVXr %shift_lhs, [[SUBREG_TO_REG]] + ; CHECK: $x0 = COPY %shift + ; CHECK: RET_ReallyLR implicit $x0 + %binop_lhs:gpr(s32) = COPY $w0 + %shift_lhs:gpr(s64) = COPY $x1 + %cst:gpr(s32) = G_CONSTANT i32 63 + %binop:gpr(s32) = G_AND %binop_lhs, %cst + %ext_and:gpr(s64) = G_ZEXT %binop(s32) + %shift:gpr(s64) = G_SHL %shift_lhs, %ext_and(s64) + $x0 = COPY %shift(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: walk_past_anyext +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $x1 + ; CHECK-LABEL: name: walk_past_anyext + ; CHECK: liveins: $w0, $x1 + ; CHECK: %binop_lhs:gpr32all = COPY $w0 + ; CHECK: %shift_lhs:gpr64 = COPY $x1 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %binop_lhs, %subreg.sub_32 + ; CHECK: %shift:gpr64 = LSLVXr %shift_lhs, [[SUBREG_TO_REG]] + ; CHECK: $x0 = COPY %shift + ; CHECK: RET_ReallyLR implicit $x0 + %binop_lhs:gpr(s32) = COPY $w0 + %shift_lhs:gpr(s64) = COPY $x1 + %cst:gpr(s32) = G_CONSTANT i32 63 + %binop:gpr(s32) = G_AND %binop_lhs, %cst + %ext_and:gpr(s64) = G_ANYEXT %binop(s32) + %shift:gpr(s64) = G_SHL %shift_lhs, %ext_and(s64) + $x0 = COPY %shift(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: dont_walk_past_sext +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $x1 + ; CHECK-LABEL: name: dont_walk_past_sext + ; CHECK: liveins: $w0, $x1 + ; CHECK: %binop_lhs:gpr32 = COPY $w0 + ; CHECK: %shift_lhs:gpr64 = COPY $x1 + ; CHECK: %binop:gpr32common = ANDWri %binop_lhs, 5 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %binop, %subreg.sub_32 + ; CHECK: %shift:gpr64 = LSLVXr %shift_lhs, [[SUBREG_TO_REG]] + ; CHECK: $x0 = COPY %shift + ; CHECK: RET_ReallyLR implicit $x0 + %binop_lhs:gpr(s32) = COPY $w0 + %shift_lhs:gpr(s64) = COPY $x1 + %cst:gpr(s32) = G_CONSTANT i32 63 + %binop:gpr(s32) = G_AND %binop_lhs, %cst + %ext_and:gpr(s64) = G_SEXT %binop(s32) + %shift:gpr(s64) = G_SHL %shift_lhs, %ext_and(s64) + $x0 = COPY %shift(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: walk_past_trunc +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: walk_past_trunc + ; CHECK: liveins: $x0, $x1 + ; CHECK: %binop_lhs:gpr64all = COPY $x0 + ; CHECK: %shift_lhs:gpr32 = COPY $w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %binop_lhs.sub_32 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: %shift:gpr32 = LSLVWr %shift_lhs, [[COPY1]] + ; CHECK: $w0 = COPY %shift + ; CHECK: RET_ReallyLR implicit $w0 + %binop_lhs:gpr(s64) = COPY $x0 + %shift_lhs:gpr(s32) = COPY $w1 + %cst:gpr(s64) = G_CONSTANT i64 31 + %binop:gpr(s64) = G_AND %binop_lhs, %cst + %binop_trunc:gpr(s32) = G_TRUNC %binop(s64) + %shift:gpr(s32) = G_SHL %shift_lhs, %binop_trunc(s32) + $w0 = COPY %shift(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: negative_value +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1 + ; We should be able to handle an all-ones value here. + ; + ; CHECK-LABEL: name: negative_value + ; CHECK: liveins: $w0, $w1 + ; CHECK: %binop_lhs:gpr32 = COPY $w0 + ; CHECK: %shift_lhs:gpr32 = COPY $w1 + ; CHECK: %shift:gpr32 = LSLVWr %shift_lhs, %binop_lhs + ; CHECK: $w0 = COPY %shift + ; CHECK: RET_ReallyLR implicit $w0 + %binop_lhs:gpr(s32) = COPY $w0 + %shift_lhs:gpr(s32) = COPY $w1 + %cst:gpr(s32) = G_CONSTANT i32 -1 + %binop:gpr(s32) = G_AND %binop_lhs, %cst + %shift:gpr(s32) = G_SHL %shift_lhs, %binop(s32) + $w0 = COPY %shift(s32) + RET_ReallyLR implicit $w0