diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -135,6 +135,10 @@ bool matchSextTruncSextLoad(MachineInstr &MI); bool applySextTruncSextLoad(MachineInstr &MI); + /// Match sext_inreg(load p), imm -> sextload p + bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple &MatchInfo); + bool applySextInRegOfLoad(MachineInstr &MI, std::tuple &MatchInfo); + bool matchElideBrByInvertingCond(MachineInstr &MI); void applyElideBrByInvertingCond(MachineInstr &MI); bool tryElideBrByInvertingCond(MachineInstr &MI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -132,6 +132,13 @@ [{ return Helper.matchSextTruncSextLoad(*${d}); }]), (apply [{ Helper.applySextTruncSextLoad(*${d}); }])>; +def sext_inreg_of_load_matchdata : GIDefMatchData<"std::tuple">; +def sext_inreg_of_load : GICombineRule< + (defs root:$root, sext_inreg_of_load_matchdata:$matchinfo), + (match (wip_match_opcode G_SEXT_INREG):$root, + [{ return Helper.matchSextInRegOfLoad(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applySextInRegOfLoad(*${root}, ${matchinfo}); }])>; + def combine_indexed_load_store : GICombineRule< (defs root:$root, indexed_load_store_matchdata:$matchinfo), (match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD, G_STORE):$root, @@ -311,4 +318,4 @@ combines_for_extload, combine_indexed_load_store, undef_combines, identity_combines, simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, - shl_ashr_to_sext_inreg]>; + shl_ashr_to_sext_inreg, sext_inreg_of_load]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -614,6 +615,67 @@ return true; } +bool CombinerHelper::matchSextInRegOfLoad( + MachineInstr &MI, std::tuple &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + + // Only supports scalars for now. + if (MRI.getType(MI.getOperand(0).getReg()).isVector()) + return false; + + Register SrcReg = MI.getOperand(1).getReg(); + MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); + if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg())) + return false; + + // If the sign extend extends from a narrower width than the load's width, + // then we can narrow the load width when we combine to a G_SEXTLOAD. + auto &MMO = **LoadDef->memoperands_begin(); + // Don't do this for non-simple loads. + if (MMO.isAtomic() || MMO.isVolatile()) + return false; + + // Avoid widening the load at all. + unsigned NewSizeBits = + std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits()); + + // Don't generate G_SEXTLOADs with a < 1 byte width. + if (NewSizeBits < 8) + return false; + // Don't bother creating a non-power-2 sextload, it will likely be broken up + // anyway for most targets. + if (!isPowerOf2_32(NewSizeBits)) + return false; + MatchInfo = {LoadDef->getOperand(0).getReg(), NewSizeBits}; + return true; +} + +bool CombinerHelper::applySextInRegOfLoad( + MachineInstr &MI, std::tuple &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + Register LoadReg; + unsigned ScalarSizeBits; + std::tie(LoadReg, ScalarSizeBits) = MatchInfo; + auto *LoadDef = MRI.getVRegDef(LoadReg); + assert(LoadDef && "Expected a load reg"); + + // If we have the following: + // %ld = G_LOAD %ptr, (load 2) + // %ext = G_SEXT_INREG %ld, 8 + // ==> + // %ld = G_SEXTLOAD %ptr (load 1) + + auto &MMO = **LoadDef->memoperands_begin(); + Builder.setInstrAndDebugLoc(MI); + auto &MF = Builder.getMF(); + auto PtrInfo = MMO.getPointerInfo(); + auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8); + Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(), + LoadDef->getOperand(1).getReg(), *NewMMO); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base, Register &Offset) { auto &MF = *MI.getParent()->getParent(); diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -477,7 +477,7 @@ MachineMemOperand *MachineFunction::getMachineMemOperand( const MachineMemOperand *MMO, MachinePointerInfo &PtrInfo, uint64_t Size) { return new (Allocator) MachineMemOperand( - PtrInfo, MMO->getFlags(), Size, Alignment, AAMDNodes(), nullptr, + PtrInfo, MMO->getFlags(), Size, MMO->getBaseAlign(), AAMDNodes(), nullptr, MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-sextload-from-sextinreg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-sextload-from-sextinreg.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-sextload-from-sextinreg.mir @@ -0,0 +1,103 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +--- +name: sextload_from_inreg +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: sextload_from_inreg + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s16) = G_SEXTLOAD [[COPY]](p0) :: (load 1, align 2) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SEXTLOAD]](s16) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(s16) = G_LOAD %0(p0) :: (load 2) + %2:_(s16) = G_SEXT_INREG %1, 8 + %3:_(s32) = G_ANYEXT %2(s16) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: non_pow_2_inreg +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: non_pow_2_inreg + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 24 + ; CHECK: $w0 = COPY [[SEXT_INREG]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(s32) = G_LOAD %0(p0) :: (load 4) + %2:_(s32) = G_SEXT_INREG %1, 24 + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: atomic +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: atomic + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load acquire 2) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s16) = G_SEXT_INREG [[LOAD]], 8 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SEXT_INREG]](s16) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(s16) = G_LOAD %0(p0) :: (load acquire 2) + %2:_(s16) = G_SEXT_INREG %1, 8 + %3:_(s32) = G_ANYEXT %2(s16) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: volatile +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: volatile + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (volatile load 2) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s16) = G_SEXT_INREG [[LOAD]], 8 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SEXT_INREG]](s16) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(s16) = G_LOAD %0(p0) :: (volatile load 2) + %2:_(s16) = G_SEXT_INREG %1, 8 + %3:_(s32) = G_ANYEXT %2(s16) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +...