diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -658,3 +658,10 @@ const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine, div_rem_to_divrem, funnel_shift_combines]>; + +// A combine group used to for prelegalizer combiners at -O0. The combines in +// this group have been selected based on experiments to balance code size and +// compile time performance. +def optnone_combines : GICombineGroup<[trivial_combines, + ptr_add_immed_chain, combines_for_extload, + not_cmp_fold, opt_brcond_by_inverting_cond]>; diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -59,7 +59,8 @@ InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &); -FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone); +FunctionPass *createAArch64O0PreLegalizerCombiner(); +FunctionPass *createAArch64PreLegalizerCombiner(); FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone); FunctionPass *createAArch64PostLegalizerLowering(); FunctionPass *createAArch64PostSelectOptimize(); @@ -82,6 +83,7 @@ void initializeAArch64LoadStoreOptPass(PassRegistry&); void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &); void initializeAArch64SIMDInstrOptPass(PassRegistry&); +void initializeAArch64O0PreLegalizerCombinerPass(PassRegistry &); void initializeAArch64PreLegalizerCombinerPass(PassRegistry&); void initializeAArch64PostLegalizerCombinerPass(PassRegistry &); void initializeAArch64PostLegalizerLoweringPass(PassRegistry &); diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -43,6 +43,13 @@ let AdditionalArguments = []; } +def AArch64O0PreLegalizerCombinerHelper: GICombinerHelper< + "AArch64GenO0PreLegalizerCombinerHelper", [optnone_combines]> { + let DisableRuleOption = "aarch64O0prelegalizercombiner-disable-rule"; + let StateClass = "AArch64O0PreLegalizerCombinerHelperState"; + let AdditionalArguments = []; +} + // Matchdata for combines which replace a G_SHUFFLE_VECTOR with a // target-specific opcode. def shuffle_matchdata : GIDefMatchData<"ShuffleVectorPseudo">; diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -184,6 +184,7 @@ initializeAArch64ExpandPseudoPass(*PR); initializeAArch64LoadStoreOptPass(*PR); initializeAArch64SIMDInstrOptPass(*PR); + initializeAArch64O0PreLegalizerCombinerPass(*PR); initializeAArch64PreLegalizerCombinerPass(*PR); initializeAArch64PostLegalizerCombinerPass(*PR); initializeAArch64PostLegalizerLoweringPass(*PR); @@ -562,8 +563,10 @@ } void AArch64PassConfig::addPreLegalizeMachineIR() { - bool IsOptNone = getOptLevel() == CodeGenOpt::None; - addPass(createAArch64PreLegalizerCombiner(IsOptNone)); + if (getOptLevel() == CodeGenOpt::None) + addPass(createAArch64O0PreLegalizerCombiner()); + else + addPass(createAArch64PreLegalizerCombiner()); } bool AArch64PassConfig::addLegalizeMachineIR() { diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/llvm/lib/Target/AArch64/CMakeLists.txt @@ -10,6 +10,8 @@ tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel) tablegen(LLVM AArch64GenGlobalISel.inc -gen-global-isel) +tablegen(LLVM AArch64GenO0PreLegalizeGICombiner.inc -gen-global-isel-combiner + -combiners="AArch64O0PreLegalizerCombinerHelper") tablegen(LLVM AArch64GenPreLegalizeGICombiner.inc -gen-global-isel-combiner -combiners="AArch64PreLegalizerCombinerHelper") tablegen(LLVM AArch64GenPostLegalizeGICombiner.inc -gen-global-isel-combiner @@ -32,6 +34,7 @@ GISel/AArch64GlobalISelUtils.cpp GISel/AArch64InstructionSelector.cpp GISel/AArch64LegalizerInfo.cpp + GISel/AArch64O0PreLegalizerCombiner.cpp GISel/AArch64PreLegalizerCombiner.cpp GISel/AArch64PostLegalizerCombiner.cpp GISel/AArch64PostLegalizerLowering.cpp diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h --- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h @@ -13,6 +13,7 @@ #define LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H #include "llvm/ADT/Optional.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/Register.h" #include "MCTargetDesc/AArch64AddressingModes.h" @@ -44,6 +45,14 @@ bool isCMN(const MachineInstr *MaybeSub, const CmpInst::Predicate &Pred, const MachineRegisterInfo &MRI); +/// Replace a G_MEMSET with a value of 0 with a G_BZERO instruction if it is +/// supported and beneficial to do so. +/// +/// \note This only applies on Darwin. +/// +/// \returns true if \p MI was replaced with a G_BZERO. +bool tryEmitBZero(MachineInstr &MI, MachineIRBuilder &MIRBuilder, bool MinSize); + } // namespace AArch64GISelUtils } // namespace llvm diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp @@ -57,3 +57,38 @@ getConstantVRegValWithLookThrough(MaybeSub->getOperand(1).getReg(), MRI); return MaybeZero && MaybeZero->Value.getZExtValue() == 0; } + +bool AArch64GISelUtils::tryEmitBZero(MachineInstr &MI, + MachineIRBuilder &MIRBuilder, + bool MinSize) { + assert(MI.getOpcode() == TargetOpcode::G_MEMSET); + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); + if (!TLI.getLibcallName(RTLIB::BZERO)) + return false; + auto Zero = getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI); + if (!Zero || Zero->Value.getSExtValue() != 0) + return false; + + // It's not faster to use bzero rather than memset for sizes <= 256. + // However, it *does* save us a mov from wzr, so if we're going for + // minsize, use bzero even if it's slower. + if (!MinSize) { + // If the size is known, check it. If it is not known, assume using bzero is + // better. + if (auto Size = + getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) { + if (Size->Value.getSExtValue() <= 256) + return false; + } + } + + MIRBuilder.setInstrAndDebugLoc(MI); + MIRBuilder + .buildInstr(TargetOpcode::G_BZERO, {}, + {MI.getOperand(0), MI.getOperand(2)}) + .addImm(MI.getOperand(3).getImm()) + .addMemOperand(*MI.memoperands_begin()); + MI.eraseFromParent(); + return true; +} \ No newline at end of file diff --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp @@ -0,0 +1,171 @@ +//=== lib/CodeGen/GlobalISel/AArch64O0PreLegalizerCombiner.cpp ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass does combining of machine instructions at the generic MI level, +// before the legalizer. +// +//===----------------------------------------------------------------------===// + +#include "AArch64GlobalISelUtils.h" +#include "AArch64TargetMachine.h" +#include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "aarch64-O0-prelegalizer-combiner" + +using namespace llvm; +using namespace MIPatternMatch; + +class AArch64O0PreLegalizerCombinerHelperState { +protected: + CombinerHelper &Helper; + +public: + AArch64O0PreLegalizerCombinerHelperState(CombinerHelper &Helper) + : Helper(Helper) {} +}; + +#define AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS +#include "AArch64GenO0PreLegalizeGICombiner.inc" +#undef AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS + +namespace { +#define AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H +#include "AArch64GenO0PreLegalizeGICombiner.inc" +#undef AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H + +class AArch64O0PreLegalizerCombinerInfo : public CombinerInfo { + GISelKnownBits *KB; + MachineDominatorTree *MDT; + AArch64GenO0PreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; + +public: + AArch64O0PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, + GISelKnownBits *KB, + MachineDominatorTree *MDT) + : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, + /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize), + KB(KB), MDT(MDT) { + if (!GeneratedRuleCfg.parseCommandLineOption()) + report_fatal_error("Invalid rule identifier"); + } + + virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, + MachineIRBuilder &B) const override; +}; + +bool AArch64O0PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, + MachineInstr &MI, + MachineIRBuilder &B) const { + CombinerHelper Helper(Observer, B, KB, MDT); + AArch64GenO0PreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper); + + if (Generated.tryCombineAll(Observer, MI, B)) + return true; + + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case TargetOpcode::G_CONCAT_VECTORS: + return Helper.tryCombineConcatVectors(MI); + case TargetOpcode::G_SHUFFLE_VECTOR: + return Helper.tryCombineShuffleVector(MI); + case TargetOpcode::G_MEMCPY: + case TargetOpcode::G_MEMMOVE: + case TargetOpcode::G_MEMSET: { + // At -O0 set a maxlen of 32 to inline; + unsigned MaxLen = 32; + // Try to inline memcpy type calls if optimizations are enabled. + if (Helper.tryCombineMemCpyFamily(MI, MaxLen)) + return true; + if (Opc == TargetOpcode::G_MEMSET) + return llvm::AArch64GISelUtils::tryEmitBZero(MI, B, EnableMinSize); + return false; + } + } + + return false; +} + +#define AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP +#include "AArch64GenO0PreLegalizeGICombiner.inc" +#undef AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP + +// Pass boilerplate +// ================ + +class AArch64O0PreLegalizerCombiner : public MachineFunctionPass { +public: + static char ID; + + AArch64O0PreLegalizerCombiner(); + + StringRef getPassName() const override { + return "AArch64O0PreLegalizerCombiner"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; +} // end anonymous namespace + +void AArch64O0PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.setPreservesCFG(); + getSelectionDAGFallbackAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +AArch64O0PreLegalizerCombiner::AArch64O0PreLegalizerCombiner() + : MachineFunctionPass(ID) { + initializeAArch64O0PreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); +} + +bool AArch64O0PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + auto &TPC = getAnalysis(); + + const Function &F = MF.getFunction(); + GISelKnownBits *KB = &getAnalysis().get(MF); + AArch64O0PreLegalizerCombinerInfo PCInfo( + false, F.hasOptSize(), F.hasMinSize(), KB, nullptr /* MDT */); + Combiner C(PCInfo, &TPC); + return C.combineMachineInstrs(MF, nullptr /* CSEInfo */); +} + +char AArch64O0PreLegalizerCombiner::ID = 0; +INITIALIZE_PASS_BEGIN(AArch64O0PreLegalizerCombiner, DEBUG_TYPE, + "Combine AArch64 machine instrs before legalization", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) +INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) +INITIALIZE_PASS_END(AArch64O0PreLegalizerCombiner, DEBUG_TYPE, + "Combine AArch64 machine instrs before legalization", false, + false) + +namespace llvm { +FunctionPass *createAArch64O0PreLegalizerCombiner() { + return new AArch64O0PreLegalizerCombiner(); +} +} // end namespace llvm diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "AArch64GlobalISelUtils.h" #include "AArch64TargetMachine.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" @@ -219,46 +220,6 @@ return true; } -/// Replace a G_MEMSET with a value of 0 with a G_BZERO instruction if it is -/// supported and beneficial to do so. -/// -/// \note This only applies on Darwin. -/// -/// \returns true if \p MI was replaced with a G_BZERO. -static bool tryEmitBZero(MachineInstr &MI, MachineIRBuilder &MIRBuilder, - bool MinSize) { - assert(MI.getOpcode() == TargetOpcode::G_MEMSET); - MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); - auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); - if (!TLI.getLibcallName(RTLIB::BZERO)) - return false; - auto Zero = getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI); - if (!Zero || Zero->Value.getSExtValue() != 0) - return false; - - // It's not faster to use bzero rather than memset for sizes <= 256. - // However, it *does* save us a mov from wzr, so if we're going for - // minsize, use bzero even if it's slower. - if (!MinSize) { - // If the size is known, check it. If it is not known, assume using bzero is - // better. - if (auto Size = - getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) { - if (Size->Value.getSExtValue() <= 256) - return false; - } - } - - MIRBuilder.setInstrAndDebugLoc(MI); - MIRBuilder - .buildInstr(TargetOpcode::G_BZERO, {}, - {MI.getOperand(0), MI.getOperand(2)}) - .addImm(MI.getOperand(3).getImm()) - .addMemOperand(*MI.memoperands_begin()); - MI.eraseFromParent(); - return true; -} - class AArch64PreLegalizerCombinerHelperState { protected: CombinerHelper &Helper; @@ -321,7 +282,7 @@ if (!EnableMinSize && Helper.tryCombineMemCpyFamily(MI, MaxLen)) return true; if (Opc == TargetOpcode::G_MEMSET) - return tryEmitBZero(MI, B, EnableMinSize); + return llvm::AArch64GISelUtils::tryEmitBZero(MI, B, EnableMinSize); return false; } } @@ -340,15 +301,13 @@ public: static char ID; - AArch64PreLegalizerCombiner(bool IsOptNone = false); + AArch64PreLegalizerCombiner(); StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; } bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override; -private: - bool IsOptNone; }; } // end anonymous namespace @@ -358,17 +317,15 @@ getSelectionDAGFallbackAnalysisUsage(AU); AU.addRequired(); AU.addPreserved(); - if (!IsOptNone) { - AU.addRequired(); - AU.addPreserved(); - } + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } -AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner(bool IsOptNone) - : MachineFunctionPass(ID), IsOptNone(IsOptNone) { +AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner() + : MachineFunctionPass(ID) { initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); } @@ -387,8 +344,7 @@ bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); GISelKnownBits *KB = &getAnalysis().get(MF); - MachineDominatorTree *MDT = - IsOptNone ? nullptr : &getAnalysis(); + MachineDominatorTree *MDT = &getAnalysis(); AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), F.hasMinSize(), KB, MDT); Combiner C(PCInfo, &TPC); @@ -408,7 +364,7 @@ namespace llvm { -FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone) { - return new AArch64PreLegalizerCombiner(IsOptNone); +FunctionPass *createAArch64PreLegalizerCombiner() { + return new AArch64PreLegalizerCombiner(); } } // end namespace llvm diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll @@ -56,9 +56,11 @@ ; VERIFY-NEXT: Verify generated machine code ; ENABLED-NEXT: Analysis for ComputingKnownBits ; ENABLED-O1-NEXT: MachineDominator Tree Construction -; ENABLED-NEXT: Analysis containing CSE Info -; ENABLED-NEXT: PreLegalizerCombiner +; ENABLED-O1-NEXT: Analysis containing CSE Info +; ENABLED-O1-NEXT: PreLegalizerCombiner +; VERIFY-O0-NEXT: AArch64O0PreLegalizerCombiner ; VERIFY-NEXT: Verify generated machine code +; VERIFY-O0-NEXT: Analysis containing CSE Info ; ENABLED-NEXT: Legalizer ; VERIFY-NEXT: Verify generated machine code ; ENABLED: RegBankSelect diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -34,8 +34,8 @@ ; CHECK-NEXT: Analysis containing CSE Info ; CHECK-NEXT: IRTranslator ; CHECK-NEXT: Analysis for ComputingKnownBits +; CHECK-NEXT: AArch64O0PreLegalizerCombiner ; CHECK-NEXT: Analysis containing CSE Info -; CHECK-NEXT: AArch64PreLegalizerCombiner ; CHECK-NEXT: Legalizer ; CHECK-NEXT: AArch64PostLegalizerLowering ; CHECK-NEXT: RegBankSelect diff --git a/llvm/test/CodeGen/AArch64/combine-loads.ll b/llvm/test/CodeGen/AArch64/combine-loads.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/combine-loads.ll +++ /dev/null @@ -1,21 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -O0 -mtriple=aarch64-unknown-unknown | FileCheck %s - -define <2 x i64> @z(i64* nocapture nonnull readonly %p) { -; CHECK-LABEL: z: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d2, [x0, #8] -; CHECK-NEXT: // implicit-def: $q0 -; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v2.16b -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: ret - %b = load i64, i64* %p - %p2 = getelementptr i64, i64* %p, i64 1 - %bb = load i64, i64* %p2 - %r1 = insertelement <2 x i64> zeroinitializer, i64 %b, i32 0 - %r2 = insertelement <2 x i64> %r1, i64 %bb, i32 1 - ret <2 x i64> %r2 -}