diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -731,6 +731,7 @@ /// llvm.memset intrinsic HANDLE_TARGET_OPCODE(G_MEMSET) +HANDLE_TARGET_OPCODE(G_BZERO) /// Vector reductions HANDLE_TARGET_OPCODE(G_VECREDUCE_SEQ_FADD) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1354,6 +1354,13 @@ let mayStore = true; } +def G_BZERO : GenericInstruction { + let OutOperandList = (outs); + let InOperandList = (ins ptype0:$dst_addr, type1:$size, untyped_imm_0:$tailcall); + let hasSideEffects = false; + let mayStore = true; +} + //------------------------------------------------------------------------------ // Bitfield extraction. //------------------------------------------------------------------------------ diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -582,7 +582,11 @@ auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); RTLIB::Libcall RTLibcall; - switch (MI.getOpcode()) { + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case TargetOpcode::G_BZERO: + RTLibcall = RTLIB::BZERO; + break; case TargetOpcode::G_MEMCPY: RTLibcall = RTLIB::MEMCPY; break; @@ -597,6 +601,13 @@ } const char *Name = TLI.getLibcallName(RTLibcall); + // Unsupported libcall on the target. + if (!Name) { + LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for " + << MIRBuilder.getTII().getName(Opc) << "\n"); + return LegalizerHelper::UnableToLegalize; + } + CallLowering::CallLoweringInfo Info; Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); Info.Callee = MachineOperand::CreateES(Name); @@ -748,10 +759,14 @@ return Status; break; } + case TargetOpcode::G_BZERO: case TargetOpcode::G_MEMCPY: case TargetOpcode::G_MEMMOVE: case TargetOpcode::G_MEMSET: { - LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI); + LegalizeResult Result = + createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI); + if (Result != Legalized) + return Result; MI.eraseFromParent(); return Result; } diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1509,26 +1509,28 @@ break; } + case TargetOpcode::G_BZERO: case TargetOpcode::G_MEMSET: { ArrayRef MMOs = MI->memoperands(); + std::string Name = Opc == TargetOpcode::G_MEMSET ? "memset" : "bzero"; if (MMOs.size() != 1) { - report("memset must have 1 memory operand", MI); + report(Twine(Name, " must have 1 memory operand"), MI); break; } if ((!MMOs[0]->isStore() || MMOs[0]->isLoad())) { - report("memset memory operand must be a store", MI); + report(Twine(Name, " memory operand must be a store"), MI); break; } LLT DstPtrTy = MRI->getType(MI->getOperand(0).getReg()); if (!DstPtrTy.isPointer()) { - report("memset operand must be a pointer", MI); + report(Twine(Name, " operand must be a pointer"), MI); break; } if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace()) - report("inconsistent memset address space", MI); + report("inconsistent " + Twine(Name, " address space"), MI); break; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -682,7 +682,8 @@ getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower(); - getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); + getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET}) + .libcall(); getActionDefinitionsBuilder(G_ABS).lowerIf( [=](const LegalityQuery &Query) { return Query.Types[0].isScalar(); }); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp @@ -217,6 +217,46 @@ return true; } +/// Replace a G_MEMSET with a value of 0 with a G_BZERO instruction if it is +/// supported and beneficial to do so. +/// +/// \note This only applies on Darwin. +/// +/// \returns true if \p MI was replaced with a G_BZERO. +static bool tryEmitBZero(MachineInstr &MI, MachineIRBuilder &MIRBuilder, + bool MinSize) { + assert(MI.getOpcode() == TargetOpcode::G_MEMSET); + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); + if (!TLI.getLibcallName(RTLIB::BZERO)) + return false; + auto Zero = getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI); + if (!Zero || Zero->Value.getSExtValue() != 0) + return false; + + // It's not faster to use bzero rather than memset for sizes <= 256. + // However, it *does* save us a mov from wzr, so if we're going for + // minsize, use bzero even if it's slower. + if (!MinSize) { + // If the size is known, check it. If it is not known, assume using bzero is + // better. + if (auto Size = + getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) { + if (Size->Value.getSExtValue() <= 256) + return false; + } + } + + MIRBuilder.setInstrAndDebugLoc(MI); + MIRBuilder + .buildInstr(TargetOpcode::G_BZERO, {}, + {MI.getOperand(0), MI.getOperand(2)}) + .addImm(MI.getOperand(3).getImm()) + .addMemOperand(*MI.memoperands_begin()); + MI.eraseFromParent(); + return true; +} + class AArch64PreLegalizerCombinerHelperState { protected: CombinerHelper &Helper; @@ -263,7 +303,8 @@ if (Generated.tryCombineAll(Observer, MI, B)) return true; - switch (MI.getOpcode()) { + unsigned Opc = MI.getOpcode(); + switch (Opc) { case TargetOpcode::G_CONCAT_VECTORS: return Helper.tryCombineConcatVectors(MI); case TargetOpcode::G_SHUFFLE_VECTOR: @@ -275,7 +316,11 @@ // heuristics decide. unsigned MaxLen = EnableOpt ? 0 : 32; // Try to inline memcpy type calls if optimizations are enabled. - return !EnableMinSize ? Helper.tryCombineMemCpyFamily(MI, MaxLen) : false; + if (!EnableMinSize && Helper.tryCombineMemCpyFamily(MI, MaxLen)) + return true; + if (Opc == TargetOpcode::G_MEMSET) + return tryEmitBZero(MI, B, EnableMinSize); + return false; } } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bzero-unsupported.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bzero-unsupported.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bzero-unsupported.mir @@ -0,0 +1,14 @@ +# RUN: not llc -mtriple=aarch64 -global-isel-abort=1 -run-pass=legalizer -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck %s +# RUN: not llc -mtriple=aarch64-linux-gnu -global-isel-abort=1 -run-pass=legalizer -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck %s +... +--- +name: bzero +tracksRegLiveness: true +body: | + bb.0: + # CHECK: LLVM ERROR: unable to legalize instruction: G_BZERO + liveins: $x0, $x1 + %ptr:_(p0) = COPY $x0 + %width:_(s64) = COPY $x1 + G_BZERO %ptr(p0), %width(s64), 0 :: (store 4) + RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bzero.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bzero.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bzero.mir @@ -0,0 +1,45 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-apple-ios -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s + +# Check that we can legalize G_BZERO on Darwin. + +... +--- +name: bzero +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; CHECK-LABEL: name: bzero + ; CHECK: liveins: $x0, $x1 + ; CHECK: %ptr:_(p0) = COPY $x0 + ; CHECK: %width:_(s64) = COPY $x1 + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY %ptr(p0) + ; CHECK: $x1 = COPY %width(s64) + ; CHECK: BL &bzero, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: RET_ReallyLR + %ptr:_(p0) = COPY $x0 + %width:_(s64) = COPY $x1 + G_BZERO %ptr(p0), %width(s64), 0 :: (store 4) + RET_ReallyLR + +... +--- +name: bzero_tail_call +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; CHECK-LABEL: name: bzero_tail_call + ; CHECK: liveins: $x0, $x1 + ; CHECK: %ptr:_(p0) = COPY $x0 + ; CHECK: %width:_(s64) = COPY $x1 + ; CHECK: $x0 = COPY %ptr(p0) + ; CHECK: $x1 = COPY %width(s64) + ; CHECK: TCRETURNdi &bzero, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x0, implicit $x1 + %ptr:_(p0) = COPY $x0 + %width:_(s64) = COPY $x1 + G_BZERO %ptr(p0), %width(s64), 1 :: (store 4) + RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -619,6 +619,7 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_MEMCPY (opcode {{[0-9]+}}): 3 type indices, 1 imm index +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_MEMMOVE (opcode {{[0-9]+}}): 3 type indices, 1 imm index @@ -629,6 +630,9 @@ # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_BZERO (opcode {{[0-9]+}}): 2 type indices, 1 imm index +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_VECREDUCE_SEQ_FADD (opcode {{[0-9]+}}): 3 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir @@ -0,0 +1,147 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-apple-ios -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=DARWIN +# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=UNKNOWN +# +# Check that on Darwin we can combine to G_BZERO. Without Darwin, this should +# stay as memset. + +--- | + define void @bzero_unknown_width() { unreachable } + define void @bzero_tail_unknown_width() { unreachable } + define void @bzero_constant_width() { unreachable } + define void @bzero_constant_width_minsize() minsize { unreachable } + define void @not_zero() minsize { unreachable } +... +--- +name: bzero_unknown_width +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; Always use G_BZERO when the memset width is unknown on Darwin. + + ; DARWIN-LABEL: name: bzero_unknown_width + ; DARWIN: liveins: $x0, $x1 + ; DARWIN: %ptr:_(p0) = COPY $x0 + ; DARWIN: %width:_(s64) = COPY $x1 + ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store 4) + ; DARWIN: RET_ReallyLR + ; UNKNOWN-LABEL: name: bzero_unknown_width + ; UNKNOWN: liveins: $x0, $x1 + ; UNKNOWN: %ptr:_(p0) = COPY $x0 + ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0 + ; UNKNOWN: %width:_(s64) = COPY $x1 + ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4) + ; UNKNOWN: RET_ReallyLR + %ptr:_(p0) = COPY $x0 + %zero:_(s8) = G_CONSTANT i8 0 + %width:_(s64) = COPY $x1 + G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4) + RET_ReallyLR +... +--- +name: bzero_tail_unknown_width +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; DARWIN-LABEL: name: bzero_tail_unknown_width + ; DARWIN: liveins: $x0, $x1 + ; DARWIN: %ptr:_(p0) = COPY $x0 + ; DARWIN: %width:_(s64) = COPY $x1 + ; DARWIN: G_BZERO %ptr(p0), %width(s64), 1 :: (store 4) + ; DARWIN: RET_ReallyLR + ; UNKNOWN-LABEL: name: bzero_tail_unknown_width + ; UNKNOWN: liveins: $x0, $x1 + ; UNKNOWN: %ptr:_(p0) = COPY $x0 + ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0 + ; UNKNOWN: %width:_(s64) = COPY $x1 + ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 1 :: (store 4) + ; UNKNOWN: RET_ReallyLR + %ptr:_(p0) = COPY $x0 + %zero:_(s8) = G_CONSTANT i8 0 + %width:_(s64) = COPY $x1 + G_MEMSET %ptr(p0), %zero(s8), %width(s64), 1 :: (store 4) + RET_ReallyLR +... +--- +name: bzero_constant_width +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; For values >256, we should use G_BZERO on Darwin. + + ; DARWIN-LABEL: name: bzero_constant_width + ; DARWIN: liveins: $x0, $x1 + ; DARWIN: %ptr:_(p0) = COPY $x0 + ; DARWIN: %width:_(s64) = G_CONSTANT i64 1024 + ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store 4) + ; DARWIN: RET_ReallyLR + ; UNKNOWN-LABEL: name: bzero_constant_width + ; UNKNOWN: liveins: $x0, $x1 + ; UNKNOWN: %ptr:_(p0) = COPY $x0 + ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0 + ; UNKNOWN: %width:_(s64) = G_CONSTANT i64 1024 + ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4) + ; UNKNOWN: RET_ReallyLR + %ptr:_(p0) = COPY $x0 + %zero:_(s8) = G_CONSTANT i8 0 + %width:_(s64) = G_CONSTANT i64 1024 + G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4) + RET_ReallyLR +... +--- +name: bzero_constant_width_minsize +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; With minsize, we should always use G_BZERO to avoid a copy from wzr. + + ; DARWIN-LABEL: name: bzero_constant_width_minsize + ; DARWIN: liveins: $x0, $x1 + ; DARWIN: %ptr:_(p0) = COPY $x0 + ; DARWIN: %width:_(s64) = G_CONSTANT i64 256 + ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store 4) + ; DARWIN: RET_ReallyLR + ; UNKNOWN-LABEL: name: bzero_constant_width_minsize + ; UNKNOWN: liveins: $x0, $x1 + ; UNKNOWN: %ptr:_(p0) = COPY $x0 + ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0 + ; UNKNOWN: %width:_(s64) = G_CONSTANT i64 256 + ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4) + ; UNKNOWN: RET_ReallyLR + %ptr:_(p0) = COPY $x0 + %zero:_(s8) = G_CONSTANT i8 0 + %width:_(s64) = G_CONSTANT i64 256 + G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4) + RET_ReallyLR +... +--- +name: not_zero +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; When the value isn't 0, don't create G_BZERO. + + ; DARWIN-LABEL: name: not_zero + ; DARWIN: liveins: $x0, $x1 + ; DARWIN: %ptr:_(p0) = COPY $x0 + ; DARWIN: %not_zero:_(s8) = G_CONSTANT i8 1 + ; DARWIN: %width:_(s64) = G_CONSTANT i64 256 + ; DARWIN: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store 4) + ; DARWIN: RET_ReallyLR + ; UNKNOWN-LABEL: name: not_zero + ; UNKNOWN: liveins: $x0, $x1 + ; UNKNOWN: %ptr:_(p0) = COPY $x0 + ; UNKNOWN: %not_zero:_(s8) = G_CONSTANT i8 1 + ; UNKNOWN: %width:_(s64) = G_CONSTANT i64 256 + ; UNKNOWN: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store 4) + ; UNKNOWN: RET_ReallyLR + %ptr:_(p0) = COPY $x0 + %not_zero:_(s8) = G_CONSTANT i8 1 + %width:_(s64) = G_CONSTANT i64 256 + G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store 4) + RET_ReallyLR diff --git a/llvm/test/MachineVerifier/test_g_bzero.mir b/llvm/test/MachineVerifier/test_g_bzero.mir new file mode 100644 --- /dev/null +++ b/llvm/test/MachineVerifier/test_g_bzero.mir @@ -0,0 +1,33 @@ +#RUN: not --crash llc -o - -march=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s +# REQUIRES: aarch64-registered-target +--- +name: test_bzero +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: +body: | + bb.0: + + %ptr:_(p0) = G_IMPLICIT_DEF + %cst1:_(s64) = G_CONSTANT i64 4 + %cst2:_(s8) = G_CONSTANT i8 7 + + ; CHECK: *** Bad machine code: bzero must have 1 memory operand *** + G_BZERO %ptr, %cst2, 0 + + ; CHECK: *** Bad machine code: bzero memory operand must be a store *** + G_BZERO %ptr, %cst2, 0 :: (load 4) + + ; CHECK: *** Bad machine code: Missing mayLoad flag *** + ; CHECK: *** Bad machine code: bzero memory operand must be a store *** + G_BZERO %ptr, %cst2, 0 :: (load store 4) + + ; CHECK: *** Bad machine code: inconsistent bzero address space *** + G_BZERO %ptr, %cst2, 0 :: (store 4, addrspace 1) + + ; CHECK: *** Bad machine code: bzero operand must be a pointer *** + G_BZERO %cst1, %cst2, 0 :: (store 4) + +...