diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -107,6 +107,9 @@ bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo); void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo); + bool matchSextAlreadyExtended(MachineInstr &MI); + bool applySextAlreadyExtended(MachineInstr &MI); + bool matchElideBrByInvertingCond(MachineInstr &MI); void applyElideBrByInvertingCond(MachineInstr &MI); bool tryElideBrByInvertingCond(MachineInstr &MI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -125,6 +125,12 @@ (apply [{ Helper.applyCombineExtendingLoads(*${root}, ${matchinfo}); }])>; def combines_for_extload: GICombineGroup<[extending_loads]>; +def sext_already_extended : GICombineRule< + (defs root:$d), + (match (wip_match_opcode G_SEXT_INREG):$d, + [{ return Helper.matchSextAlreadyExtended(*${d}); }]), + (apply [{ Helper.applySextAlreadyExtended(*${d}); }])>; + def combine_indexed_load_store : GICombineRule< (defs root:$root, indexed_load_store_matchdata:$matchinfo), (match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD, G_STORE):$root, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -576,6 +576,24 @@ return isPredecessor(DefMI, UseMI); } +bool CombinerHelper::matchSextAlreadyExtended(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + Register SrcReg = MI.getOperand(1).getReg(); + unsigned SrcSignBits = KB->computeNumSignBits(SrcReg); + unsigned NumSextBits = + MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() - + MI.getOperand(2).getImm(); + return SrcSignBits >= NumSextBits; +} + +bool CombinerHelper::applySextAlreadyExtended(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + MachineIRBuilder MIB(MI); + MIB.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base, Register &Offset) { auto &MF = *MI.getParent()->getParent(); diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -11,6 +11,7 @@ // //===------------------ #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -441,6 +442,16 @@ unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp; } + case TargetOpcode::G_SEXTLOAD: { + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + // TODO: add vector support + if (Ty.isVector()) + break; + if (MI.hasOneMemOperand()) + return Ty.getSizeInBits() - (*MI.memoperands_begin())->getSizeInBits(); + break; + } case TargetOpcode::G_TRUNC: { Register Src = MI.getOperand(1).getReg(); LLT SrcTy = MRI.getType(Src); diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -79,6 +79,6 @@ def AArch64PostLegalizerCombinerHelper : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", [erase_undef_store, combines_for_extload, - shuffle_vector_pseudos]> { + sext_already_extended, shuffle_vector_pseudos]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-trunc-sextload.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-trunc-sextload.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-trunc-sextload.mir @@ -0,0 +1,81 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +--- +name: test_combine_sext_trunc_of_sextload +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_combine_sext_trunc_of_sextload + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load 2) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[SEXTLOAD]](s64) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; CHECK: $w0 = COPY [[COPY1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s64) = G_SEXTLOAD %0:_(p0) :: (load 2) + %2:_(s32) = G_TRUNC %1:_(s64) + %3:_(s32) = G_SEXT_INREG %2:_(s32), 16 + $w0 = COPY %3(s32) +... +--- +name: test_combine_sext_of_sextload +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_combine_sext_of_sextload + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: $w0 = COPY [[COPY2]](s32) + %0:_(p0) = COPY $x0 + %1:_(s32) = G_SEXTLOAD %0:_(p0) :: (load 2) + %2:_(s32) = COPY %1:_(s32) + %3:_(s32) = G_SEXT_INREG %2:_(s32), 16 + $w0 = COPY %3(s32) +... +--- +name: test_combine_sext_of_sextload_not_matching +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + ; Here we're trying to extend from a larger width than was extended in the load. + ; CHECK-LABEL: name: test_combine_sext_of_sextload_not_matching + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32) + ; CHECK: $w0 = COPY [[COPY1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s32) = G_SEXTLOAD %0:_(p0) :: (load 2) + %2:_(s32) = G_SEXT_INREG %1:_(s32), 24 + $w0 = COPY %2(s32) +... +--- +name: test_combine_sext_of_sextload_not_enough_src_signbits +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + ; Here we're trying to extend from a smaller width than was extended in the load. + ; Don't perform the combine. + ; CHECK-LABEL: name: test_combine_sext_of_sextload_not_enough_src_signbits + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 8 + ; CHECK: $w0 = COPY [[SEXT_INREG]](s32) + %0:_(p0) = COPY $x0 + %1:_(s32) = G_SEXTLOAD %0:_(p0) :: (load 2) + %2:_(s32) = G_SEXT_INREG %1:_(s32), 8 + $w0 = COPY %2(s32) +...