Index: llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -25,6 +25,18 @@ // ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source // operand are set to zero. // +// 3. Try to fold lsl + ldr/str +// +// ldr/str can be folded with `lsl 2` for 32 bit and `lsl 3` for 64 bit as +// below. +// +// %7:gpr64 = UBFMXri killed %6:gpr64, 62, 61 +// %8:gpr32 = LDRWroX %1:gpr64common, %7:gpr64, 0, 0 +// STRWroX killed %9:gpr32, %1:gpr64common, %7:gpr64, 0, 0 +// ==> +// %8:gpr32 = LDRWroX %1:gpr64common, %6:gpr64, 0, 1 +// STRWroX killed %9:gpr32, %1:gpr64common, %6:gpr64, 0, 1 +// //===----------------------------------------------------------------------===// #include "AArch64ExpandImm.h" @@ -56,6 +68,9 @@ SmallSetVector &ToBeRemoved); bool visitORR(MachineInstr &MI, SmallSetVector &ToBeRemoved); + bool visitLDRSTR(MachineInstr &MI, + SmallSetVector &ToBeRemoved, + bool is32Bit); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { @@ -251,6 +266,40 @@ return true; } +bool AArch64MIPeepholeOpt::visitLDRSTR( + MachineInstr &MI, SmallSetVector &ToBeRemoved, + bool is32Bit) { + Register SrcReg = MI.getOperand(2).getReg(); + MachineInstr *SrcMI = MRI->getUniqueVRegDef(SrcReg); + if (!SrcMI) + return false; + + if (SrcMI->getOpcode() != AArch64::UBFMXri) + return false; + + unsigned Immr = SrcMI->getOperand(2).getImm(); + unsigned Imms = SrcMI->getOperand(3).getImm(); + // Check lsl 2 for 32bit ldr/str. + if (is32Bit && (Immr != 62 || Imms != 61)) + return false; + + // Check lsl 3 for 64bit ldr/str. + if (!is32Bit && (Immr != 61 || Imms != 60)) + return false; + + // Update ldr/str with lsl. + MI.getOperand(2).setReg(SrcMI->getOperand(1).getReg()); + MI.getOperand(3).setImm(0); + MI.getOperand(4).setImm(1); + + if (MRI->use_empty(SrcReg)) + ToBeRemoved.insert(SrcMI); + + LLVM_DEBUG({ dbgs() << "Removed: " << MI << "\n"; }); + + return true; +} + bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -278,6 +327,15 @@ break; case AArch64::ORRWrs: Changed = visitORR(MI, ToBeRemoved); + break; + case AArch64::STRWroX: + case AArch64::LDRWroX: + Changed = visitLDRSTR(MI, ToBeRemoved, true /* is32Bit */); + break; + case AArch64::STRXroX: + case AArch64::LDRXroX: + Changed = visitLDRSTR(MI, ToBeRemoved, false /* is32Bit */); + break; } } } Index: llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll +++ llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll @@ -134,9 +134,8 @@ ; CHECK-NEXT: b.hs .LBB7_1 ; CHECK-NEXT: // %bb.3: // %if.then ; CHECK-NEXT: // in Loop: Header=BB7_2 Depth=1 -; CHECK-NEXT: lsl x10, x8, #3 -; CHECK-NEXT: ldr x11, [x1, x10] -; CHECK-NEXT: str x11, [x2, x10] +; CHECK-NEXT: ldr x10, [x1, x8, lsl #3] +; CHECK-NEXT: str x10, [x2, x8, lsl #3] ; CHECK-NEXT: b .LBB7_1 ; CHECK-NEXT: .LBB7_4: // %exit ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/extract-bits.ll =================================================================== --- llvm/test/CodeGen/AArch64/extract-bits.ll +++ llvm/test/CodeGen/AArch64/extract-bits.ll @@ -972,10 +972,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x1] ; CHECK-NEXT: ubfx x8, x8, #21, #10 -; CHECK-NEXT: lsl x8, x8, #2 -; CHECK-NEXT: ldr w9, [x0, x8] +; CHECK-NEXT: ldr w9, [x0, x8, lsl #2] ; CHECK-NEXT: add w9, w9, #1 -; CHECK-NEXT: str w9, [x0, x8] +; CHECK-NEXT: str w9, [x0, x8, lsl #2] ; CHECK-NEXT: ret %tmp = load i64, i64* %a1, align 8 %tmp1 = lshr i64 %tmp, 21