Index: llvm/lib/Target/AArch64/AArch64Combine.td =================================================================== --- llvm/lib/Target/AArch64/AArch64Combine.td +++ llvm/lib/Target/AArch64/AArch64Combine.td @@ -119,13 +119,19 @@ (apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }]) >; +def fp_store_with_zero_imm : GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_STORE):$root, + [{ return matchFPStoreWithZeroImm(*${root}, MRI); }]), + (apply [{ applyFPStoreWithZeroImm(*${root}, B, Observer); }])>; + // Post-legalization combines which should happen at all optimization levels. // (E.g. ones that facilitate matching for the selector) For example, matching // pseudos. def AArch64PostLegalizerLoweringHelper : GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper", [shuffle_vector_pseudos, vashr_vlshr_imm, - icmp_lowering, form_duplane]> { + icmp_lowering, form_duplane, fp_store_with_zero_imm]> { let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule"; } Index: llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -610,6 +610,36 @@ return true; } +/// Match a G_STORE which stores a positive 0.0. +static bool matchFPStoreWithZeroImm(MachineInstr &I, MachineRegisterInfo &MRI) { + assert(I.getOpcode() == TargetOpcode::G_STORE); + Register Src = I.getOperand(0).getReg(); + LLT Ty = MRI.getType(Src); + if (Ty.isVector()) + return false; + unsigned Size = Ty.getSizeInBits(); + if (Size != 32 && Size != 64) + return false; + auto *MaybeZero = getConstantFPVRegVal(Src, MRI); + if (!MaybeZero || !MaybeZero->isNullValue()) + return false; + return true; +} + +/// Change a G_STORE which stores a positive 0.0 into a G_STORE which stores +/// an integer 0. +static bool applyFPStoreWithZeroImm(MachineInstr &I, MachineIRBuilder &MIB, + GISelChangeObserver &Observer) { + MIB.setInstrAndDebugLoc(I); + MachineRegisterInfo &MRI = *MIB.getMRI(); + auto Zero = + MIB.buildConstant(MRI.cloneVirtualRegister(I.getOperand(0).getReg()), 0); + Observer.changingInstr(I); + I.getOperand(0).setReg(Zero->getOperand(0).getReg()); + Observer.changedInstr(I); + return true; +} + #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS #include "AArch64GenPostLegalizeGILowering.inc" #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS Index: llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-zero-store.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-zero-store.mir @@ -0,0 +1,88 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs %s -o - | FileCheck %s + +# Test that we use G_CONSTANT when we're storing 0, and can use a zero register. + +... +--- +name: s32 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: s32 + ; CHECK: liveins: $x0 + ; CHECK: %ptr:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: G_STORE [[C]](s32), %ptr(p0) :: (store 4) + ; CHECK: RET_ReallyLR + %ptr:_(p0) = COPY $x0 + %zero:_(s32) = G_FCONSTANT float 0.000000e+00 + G_STORE %zero(s32), %ptr(p0) :: (store 4) + RET_ReallyLR + +... +--- +name: s64 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: s64 + ; CHECK: liveins: $x0 + ; CHECK: %ptr:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: G_STORE [[C]](s64), %ptr(p0) :: (store 8) + ; CHECK: RET_ReallyLR + %ptr:_(p0) = COPY $x0 + %zero:_(s64) = G_FCONSTANT double 0.000000e+00 + G_STORE %zero(s64), %ptr(p0) :: (store 8) + RET_ReallyLR + +... +--- +name: s16 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; We should be able to handle this with full FP16, but right now, we don't. + + ; CHECK-LABEL: name: s16 + ; CHECK: liveins: $x0 + ; CHECK: %ptr:_(p0) = COPY $x0 + ; CHECK: %zero:_(s16) = G_FCONSTANT half 0xH0000 + ; CHECK: G_STORE %zero(s16), %ptr(p0) :: (store 2) + ; CHECK: RET_ReallyLR + %ptr:_(p0) = COPY $x0 + %zero:_(s16) = G_FCONSTANT half 0.000000e+00 + G_STORE %zero(s16), %ptr(p0) :: (store 2) + RET_ReallyLR + +... +--- +name: negative_zero +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; Only positive zero should be handled here. + + ; CHECK-LABEL: name: negative_zero + ; CHECK: liveins: $x0 + ; CHECK: %ptr:_(p0) = COPY $x0 + ; CHECK: %zero:_(s32) = G_FCONSTANT float -0.000000e+00 + ; CHECK: G_STORE %zero(s32), %ptr(p0) :: (store 4) + ; CHECK: RET_ReallyLR + %ptr:_(p0) = COPY $x0 + %zero:_(s32) = G_FCONSTANT float -0.000000e+00 + G_STORE %zero(s32), %ptr(p0) :: (store 4) + RET_ReallyLR