diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -1114,16 +1114,19 @@ (truncstore node:$val, node:$ptr)> { let IsStore = true; let MemoryVT = i8; + let IsTruncStore = true; } def truncstorei16 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { let IsStore = true; let MemoryVT = i16; + let IsTruncStore = true; } def truncstorei32 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { let IsStore = true; let MemoryVT = i32; + let IsTruncStore = true; } def truncstoref16 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -182,6 +182,14 @@ [{ return lowerVectorFCMP(*${root}, MRI, B); }]), (apply [{}])>; +def form_truncstore_matchdata : GIDefMatchData<"Register">; +def form_truncstore : GICombineRule< + (defs root:$root, form_truncstore_matchdata:$matchinfo), + (match (wip_match_opcode G_STORE):$root, + [{ return matchFormTruncstore(*${root}, MRI, ${matchinfo}); }]), + (apply [{ applyFormTruncstore(*${root}, MRI, B, Observer, ${matchinfo}); }]) +>; + // Post-legalization combines which should happen at all optimization levels. // (E.g. ones that facilitate matching for the selector) For example, matching // pseudos. @@ -189,7 +197,7 @@ : GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper", [shuffle_vector_lowering, vashr_vlshr_imm, icmp_lowering, build_vector_lowering, - lower_vector_fcmp]> { + lower_vector_fcmp, form_truncstore]> { let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule"; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -306,11 +306,17 @@ getActionDefinitionsBuilder(G_STORE) .legalForTypesWithMemDesc({{s8, p0, 8, 8}, + {s16, p0, 8, 8}, // truncstorei8 from s16 + {s32, p0, 8, 8}, // truncstorei8 from s32 + {s64, p0, 8, 8}, // truncstorei8 from s64 {s16, p0, 16, 8}, + {s32, p0, 16, 8}, // truncstorei16 from s32 + {s64, p0, 16, 8}, // truncstorei16 from s64 {s32, p0, 8, 8}, {s32, p0, 16, 8}, {s32, p0, 32, 8}, {s64, p0, 64, 8}, + {s64, p0, 32, 8}, // truncstorei32 from s64 {p0, p0, 64, 8}, {s128, p0, 128, 8}, {v16s8, p0, 128, 8}, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -951,6 +951,27 @@ return false; } +static bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI, + Register &SrcReg) { + assert(MI.getOpcode() == TargetOpcode::G_STORE); + Register DstReg = MI.getOperand(0).getReg(); + if (MRI.getType(DstReg).isVector()) + return false; + // Match a store of a truncate. + return mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))); +} + +static bool applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, + GISelChangeObserver &Observer, + Register &SrcReg) { + assert(MI.getOpcode() == TargetOpcode::G_STORE); + Observer.changingInstr(MI); + MI.getOperand(0).setReg(SrcReg); + Observer.changedInstr(MI); + return true; +} + #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS #include "AArch64GenPostLegalizeGILowering.inc" #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -888,22 +888,18 @@ define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_8: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: add x8, x0, w1, sxtw -; CHECK-NOLSE-O1-NEXT: sub x9, x0, #256 ; =256 -; CHECK-NOLSE-O1-NEXT: add x10, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 ; CHECK-NOLSE-O1-NEXT: strb w2, [x0, #4095] +; CHECK-NOLSE-O1-NEXT: strb w2, [x0, w1, sxtw] +; CHECK-NOLSE-O1-NEXT: sturb w2, [x0, #-256] ; CHECK-NOLSE-O1-NEXT: strb w2, [x8] -; CHECK-NOLSE-O1-NEXT: strb w2, [x9] -; CHECK-NOLSE-O1-NEXT: strb w2, [x10] ; CHECK-NOLSE-O1-NEXT: ret ; ; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_8: ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: strb w2, [x0, #4095] -; CHECK-NOLSE-O0-NEXT: add x8, x0, w1, sxtw -; CHECK-NOLSE-O0-NEXT: strb w2, [x8] -; CHECK-NOLSE-O0-NEXT: subs x8, x0, #256 ; =256 -; CHECK-NOLSE-O0-NEXT: strb w2, [x8] +; CHECK-NOLSE-O0-NEXT: strb w2, [x0, w1, sxtw] +; CHECK-NOLSE-O0-NEXT: sturb w2, [x0, #-256] ; CHECK-NOLSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936 ; CHECK-NOLSE-O0-NEXT: strb w2, [x8] ; CHECK-NOLSE-O0-NEXT: ret @@ -911,10 +907,8 @@ ; CHECK-LSE-O1-LABEL: atomic_store_relaxed_8: ; CHECK-LSE-O1: ; %bb.0: ; CHECK-LSE-O1-NEXT: strb w2, [x0, #4095] -; CHECK-LSE-O1-NEXT: add x8, x0, w1, sxtw -; CHECK-LSE-O1-NEXT: strb w2, [x8] -; CHECK-LSE-O1-NEXT: sub x8, x0, #256 ; =256 -; CHECK-LSE-O1-NEXT: strb w2, [x8] +; CHECK-LSE-O1-NEXT: strb w2, [x0, w1, sxtw] +; CHECK-LSE-O1-NEXT: sturb w2, [x0, #-256] ; CHECK-LSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 ; CHECK-LSE-O1-NEXT: strb w2, [x8] ; CHECK-LSE-O1-NEXT: ret @@ -922,10 +916,8 @@ ; CHECK-LSE-O0-LABEL: atomic_store_relaxed_8: ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: strb w2, [x0, #4095] -; CHECK-LSE-O0-NEXT: add x8, x0, w1, sxtw -; CHECK-LSE-O0-NEXT: strb w2, [x8] -; CHECK-LSE-O0-NEXT: subs x8, x0, #256 ; =256 -; CHECK-LSE-O0-NEXT: strb w2, [x8] +; CHECK-LSE-O0-NEXT: strb w2, [x0, w1, sxtw] +; CHECK-LSE-O0-NEXT: sturb w2, [x0, #-256] ; CHECK-LSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936 ; CHECK-LSE-O0-NEXT: strb w2, [x8] ; CHECK-LSE-O0-NEXT: ret @@ -947,22 +939,18 @@ define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_16: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: add x8, x0, w1, sxtw #1 -; CHECK-NOLSE-O1-NEXT: sub x9, x0, #256 ; =256 -; CHECK-NOLSE-O1-NEXT: add x10, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 ; CHECK-NOLSE-O1-NEXT: strh w2, [x0, #8190] +; CHECK-NOLSE-O1-NEXT: strh w2, [x0, w1, sxtw #1] +; CHECK-NOLSE-O1-NEXT: sturh w2, [x0, #-256] ; CHECK-NOLSE-O1-NEXT: strh w2, [x8] -; CHECK-NOLSE-O1-NEXT: strh w2, [x9] -; CHECK-NOLSE-O1-NEXT: strh w2, [x10] ; CHECK-NOLSE-O1-NEXT: ret ; ; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_16: ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: strh w2, [x0, #8190] -; CHECK-NOLSE-O0-NEXT: add x8, x0, w1, sxtw #1 -; CHECK-NOLSE-O0-NEXT: strh w2, [x8] -; CHECK-NOLSE-O0-NEXT: subs x8, x0, #256 ; =256 -; CHECK-NOLSE-O0-NEXT: strh w2, [x8] +; CHECK-NOLSE-O0-NEXT: strh w2, [x0, w1, sxtw #1] +; CHECK-NOLSE-O0-NEXT: sturh w2, [x0, #-256] ; CHECK-NOLSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936 ; CHECK-NOLSE-O0-NEXT: strh w2, [x8] ; CHECK-NOLSE-O0-NEXT: ret @@ -970,10 +958,8 @@ ; CHECK-LSE-O1-LABEL: atomic_store_relaxed_16: ; CHECK-LSE-O1: ; %bb.0: ; CHECK-LSE-O1-NEXT: strh w2, [x0, #8190] -; CHECK-LSE-O1-NEXT: add x8, x0, w1, sxtw #1 -; CHECK-LSE-O1-NEXT: strh w2, [x8] -; CHECK-LSE-O1-NEXT: sub x8, x0, #256 ; =256 -; CHECK-LSE-O1-NEXT: strh w2, [x8] +; CHECK-LSE-O1-NEXT: strh w2, [x0, w1, sxtw #1] +; CHECK-LSE-O1-NEXT: sturh w2, [x0, #-256] ; CHECK-LSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 ; CHECK-LSE-O1-NEXT: strh w2, [x8] ; CHECK-LSE-O1-NEXT: ret @@ -981,10 +967,8 @@ ; CHECK-LSE-O0-LABEL: atomic_store_relaxed_16: ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: strh w2, [x0, #8190] -; CHECK-LSE-O0-NEXT: add x8, x0, w1, sxtw #1 -; CHECK-LSE-O0-NEXT: strh w2, [x8] -; CHECK-LSE-O0-NEXT: subs x8, x0, #256 ; =256 -; CHECK-LSE-O0-NEXT: strh w2, [x8] +; CHECK-LSE-O0-NEXT: strh w2, [x0, w1, sxtw #1] +; CHECK-LSE-O0-NEXT: sturh w2, [x0, #-256] ; CHECK-LSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936 ; CHECK-LSE-O0-NEXT: strh w2, [x8] ; CHECK-LSE-O0-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir @@ -491,3 +491,29 @@ %val:_(<4 x s64>) = G_LOAD %ptr(p0) :: (load 32) G_STORE %val(<4 x s64>), %ptr(p0) :: (store 32) RET_ReallyLR +... +--- +name: test_trunc_store +body: | + bb.0: + liveins: $x0, $w1 + + ; CHECK-LABEL: name: test_trunc_store + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: %val64:_(s64) = COPY $x2 + ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 1) + ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 2) + ; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store 1) + ; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store 2) + ; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store 4) + %0:_(p0) = COPY $x0 + %1:_(s32) = COPY $w1 + %2:_(s8) = G_TRUNC %1(s32) + %val64:_(s64) = COPY $x2 + G_STORE %1(s32), %0(p0) :: (store 1) + G_STORE %1(s32), %0(p0) :: (store 2) + G_STORE %val64(s64), %0(p0) :: (store 1) + G_STORE %val64(s64), %0(p0) :: (store 2) + G_STORE %val64(s64), %0(p0) :: (store 4) +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-truncstore.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-truncstore.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-truncstore.mir @@ -0,0 +1,34 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -global-isel -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: truncstore_s8 +legalized: true +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: truncstore_s8 + ; CHECK: %ptr:_(p0) = COPY $x0 + ; CHECK: %val:_(s32) = COPY $w1 + ; CHECK: G_STORE %val(s32), %ptr(p0) :: (store 1) + %ptr:_(p0) = COPY $x0 + %val:_(s32) = COPY $w1 + %trunc:_(s8) = G_TRUNC %val + G_STORE %trunc(s8), %ptr(p0) :: (store 1) +... +--- +name: truncstore_vector +legalized: true +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: truncstore_vector + ; CHECK: %ptr:_(p0) = COPY $x0 + ; CHECK: %val:_(<4 x s32>) = COPY $q0 + ; CHECK: %trunc:_(<4 x s8>) = G_TRUNC %val(<4 x s32>) + ; CHECK: G_STORE %trunc(<4 x s8>), %ptr(p0) :: (store 4) + %ptr:_(p0) = COPY $x0 + %val:_(<4 x s32>) = COPY $q0 + %trunc:_(<4 x s8>) = G_TRUNC %val + G_STORE %trunc(<4 x s8>), %ptr(p0) :: (store 4) +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=1 %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -43,6 +43,8 @@ define void @store_adrp_add_low() { ret void } define void @store_adrp_add_low_foldable_offset() { ret void } define void @store_adrp_add_low_unfoldable_offset() { ret void } + + define void @truncstores(i8* %addr) { ret void } ... --- @@ -663,3 +665,50 @@ %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + 3 %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + 3 G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x) +... + +--- +name: truncstores +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0, $w1, $x2 + + ; CHECK-LABEL: name: truncstores + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: %val32:gpr32 = COPY $w1 + ; CHECK: %val64:gpr64 = COPY $x2 + ; CHECK: STRBBui %val32, [[COPY]], 0 :: (store 1) + ; CHECK: STRBBui %val32, [[COPY]], 43 :: (store 1) + ; CHECK: STRHHui %val32, [[COPY]], 0 :: (store 2) + ; CHECK: STURHHi %val32, [[COPY]], 43 :: (store 2) + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %val64.sub_32 + ; CHECK: STRHHui [[COPY1]], [[COPY]], 0 :: (store 2) + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY %val64.sub_32 + ; CHECK: STURHHi [[COPY2]], [[COPY]], 43 :: (store 2) + ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY %val64.sub_32 + ; CHECK: STRWui [[COPY3]], [[COPY]], 0 :: (store 4) + ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY %val64.sub_32 + ; CHECK: STURWi [[COPY4]], [[COPY]], 43 :: (store 4) + %0:gpr(p0) = COPY $x0 + %val32:gpr(s32) = COPY $w1 + %val64:gpr(s64) = COPY $x2 + G_STORE %val32, %0 :: (store 1) + ; unscaled offset: + %cst:gpr(s64) = G_CONSTANT i64 43 + %newptr:gpr(p0) = G_PTR_ADD %0, %cst + G_STORE %val32, %newptr :: (store 1) + + G_STORE %val32, %0 :: (store 2) + ; unscaled offset: + G_STORE %val32, %newptr :: (store 2) + + G_STORE %val64, %0 :: (store 2) + ; unscaled offset: + G_STORE %val64, %newptr :: (store 2) + + G_STORE %val64, %0 :: (store 4) + ; unscaled offset: + G_STORE %val64, %newptr :: (store 4) +... diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -3657,6 +3657,10 @@ Optional inferRegClassFromPattern(TreePatternNode *N); + /// Return the size of the MemoryVT in this predicate, if possible. + Optional + getMemSizeBitsFromPredicate(const TreePredicateFn &Predicate); + // Add builtin predicates. Expected addBuiltinPredicates(const Record *SrcGIEquivOrNull, @@ -3769,6 +3773,17 @@ return Error::success(); } +Optional GlobalISelEmitter::getMemSizeBitsFromPredicate(const TreePredicateFn &Predicate) { + Optional MemTyOrNone = + MVTToLLT(getValueType(Predicate.getMemoryVT())); + + if (!MemTyOrNone) + return None; + + // Align so unusual types like i1 don't get rounded down. + return llvm::alignTo(MemTyOrNone->get().getSizeInBits(), 8); +} + Expected GlobalISelEmitter::addBuiltinPredicates( const Record *SrcGIEquivOrNull, const TreePredicateFn &Predicate, InstructionMatcher &InsnMatcher, bool &HasAddedMatcher) { @@ -3808,9 +3823,18 @@ if (Predicate.isStore()) { if (Predicate.isTruncStore()) { - // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size. - InsnMatcher.addPredicate( - 0, MemoryVsLLTSizePredicateMatcher::LessThan, 0); + if (Predicate.getMemoryVT() != nullptr) { + // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size. + auto MemSizeInBits = getMemSizeBitsFromPredicate(Predicate); + if (!MemSizeInBits) + return failedImport("MemVT could not be converted to LLT"); + + InsnMatcher.addPredicate(0, *MemSizeInBits / + 8); + } else { + InsnMatcher.addPredicate( + 0, MemoryVsLLTSizePredicateMatcher::LessThan, 0); + } return InsnMatcher; } if (Predicate.isNonTruncStore()) { @@ -3837,19 +3861,12 @@ if (Predicate.isLoad() || Predicate.isStore() || Predicate.isAtomic()) { if (Predicate.getMemoryVT() != nullptr) { - Optional MemTyOrNone = - MVTToLLT(getValueType(Predicate.getMemoryVT())); - - if (!MemTyOrNone) + auto MemSizeInBits = getMemSizeBitsFromPredicate(Predicate); + if (!MemSizeInBits) return failedImport("MemVT could not be converted to LLT"); - // MMO's work in bytes so we must take care of unusual types like i1 - // don't round down. - unsigned MemSizeInBits = - llvm::alignTo(MemTyOrNone->get().getSizeInBits(), 8); - InsnMatcher.addPredicate(0, - MemSizeInBits / 8); + *MemSizeInBits / 8); return InsnMatcher; } }