Index: llvm/lib/Target/AArch64/AArch64.td =================================================================== --- llvm/lib/Target/AArch64/AArch64.td +++ llvm/lib/Target/AArch64/AArch64.td @@ -220,14 +220,9 @@ "enable-select-opt", "EnableSelectOptimize", "true", "Enable the select optimize pass for select loop heuristics">; -def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move", - "HasCustomCheapAsMoveHandling", "true", - "Use custom handling of cheap instructions">; - def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move", "HasExynosCheapAsMoveHandling", "true", - "Use Exynos specific handling of cheap instructions", - [FeatureCustomCheapAsMoveHandling]>; + "Use Exynos specific handling of cheap instructions">; def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", "UsePostRAScheduler", "true", "Schedule again after register allocation">; @@ -758,7 +753,6 @@ FeatureFuseAES, FeatureFuseAdrpAdd, FeatureBalanceFPOps, - FeatureCustomCheapAsMoveHandling, FeaturePostRAScheduler]>; def TuneA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", @@ -779,7 +773,6 @@ "Cortex-A57 ARM processors", [ FeatureFuseAES, FeatureBalanceFPOps, - FeatureCustomCheapAsMoveHandling, FeatureFuseAdrpAdd, FeatureFuseLiterals, FeaturePostRAScheduler, @@ -1063,7 +1056,6 @@ def TuneKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", "Qualcomm Kryo processors", [ - FeatureCustomCheapAsMoveHandling, FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive, FeatureZCZeroing, @@ -1072,7 +1064,6 @@ def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", "Qualcomm Falkor processors", [ - FeatureCustomCheapAsMoveHandling, FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive, FeatureZCZeroing, @@ -1132,7 +1123,6 @@ def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira", "Qualcomm Saphira processors", [ - FeatureCustomCheapAsMoveHandling, FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive, FeatureZCZeroing, @@ -1180,7 +1170,6 @@ def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110", "HiSilicon TS-V110 processors", [ - FeatureCustomCheapAsMoveHandling, FeatureFuseAES, FeaturePostRAScheduler]>; Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -798,85 +798,64 @@ return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding); } -// FIXME: this implementation should be micro-architecture dependent, so a -// micro-architecture target hook should be introduced here in future. -bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { - if (!Subtarget.hasCustomCheapAsMoveHandling()) - return MI.isAsCheapAsAMove(); - - const unsigned Opcode = MI.getOpcode(); +// Return true if Imm is suitable as an immediate operand to MOVZWi or MOVZXi. +static bool isMOVZImm(uint64_t Imm, unsigned BitSize) { + if (BitSize == 32) + return Imm <= 0xffff || ((Imm & 0xffff) == 0 && (Imm >> 16) <= 0xffff); + + assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed"); + // Shift 0 + if (Imm <= 0xffff) + return true; + if ((Imm & 0xffff) != 0) + return false; - // Firstly, check cases gated by features. + // Shift 16 + Imm >>= 16; + if (Imm <= 0xffff) + return true; + if ((Imm & 0xffff) != 0) + return false; - if (Subtarget.hasZeroCycleZeroingFP()) { - if (Opcode == AArch64::FMOVH0 || - Opcode == AArch64::FMOVS0 || - Opcode == AArch64::FMOVD0) - return true; - } + // Shift 32 + Imm >>= 16; + if (Imm <= 0xffff) + return true; + if ((Imm & 0xffff) != 0) + return false; - if (Subtarget.hasZeroCycleZeroingGP()) { - if (Opcode == TargetOpcode::COPY && - (MI.getOperand(1).getReg() == AArch64::WZR || - MI.getOperand(1).getReg() == AArch64::XZR)) - return true; - } + // Shift 48 + Imm >>= 16; + return Imm <= 0xffff; +} - // Secondly, check cases specific to sub-targets. +/// Returns true if a MOVi32imm or MOVi64imm can be expanded to MOVZ or MOVN. +static bool canBeExpandedToMOVZN(const MachineInstr &MI, unsigned BitSize) { + uint64_t Mask = ~uint64_t(0) >> (64 - BitSize); + uint64_t Imm = static_cast(MI.getOperand(1).getImm()); + return isMOVZImm(Imm & Mask, BitSize) || isMOVZImm(~Imm & Mask, BitSize); +} +// FIXME: this implementation should be micro-architecture dependent, so a +// micro-architecture target hook should be introduced here in future. +bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { if (Subtarget.hasExynosCheapAsMoveHandling()) { if (isExynosCheapAsMove(MI)) return true; - return MI.isAsCheapAsAMove(); } - // Finally, check generic cases. - - switch (Opcode) { + switch (MI.getOpcode()) { default: - return false; - - // add/sub on register without shift - case AArch64::ADDWri: - case AArch64::ADDXri: - case AArch64::SUBWri: - case AArch64::SUBXri: - return (MI.getOperand(3).getImm() == 0); - - // logical ops on immediate - case AArch64::ANDWri: - case AArch64::ANDXri: - case AArch64::EORWri: - case AArch64::EORXri: - case AArch64::ORRWri: - case AArch64::ORRXri: - return true; - - // logical ops on register without shift - case AArch64::ANDWrr: - case AArch64::ANDXrr: - case AArch64::BICWrr: - case AArch64::BICXrr: - case AArch64::EONWrr: - case AArch64::EONXrr: - case AArch64::EORWrr: - case AArch64::EORXrr: - case AArch64::ORNWrr: - case AArch64::ORNXrr: - case AArch64::ORRWrr: - case AArch64::ORRXrr: - return true; - + return MI.isAsCheapAsAMove(); // If MOVi32imm or MOVi64imm can be expanded into ORRWri or - // ORRXri, it is as cheap as MOV + // ORRXri, it is as cheap as MOV. + // Likewise if it can be expanded to MOVN/MOVZ. case AArch64::MOVi32imm: - return canBeExpandedToORR(MI, 32); + return canBeExpandedToMOVZN(MI, 32) || canBeExpandedToORR(MI, 32); case AArch64::MOVi64imm: - return canBeExpandedToORR(MI, 64); + return canBeExpandedToMOVZN(MI, 64) || canBeExpandedToORR(MI, 64); } - - llvm_unreachable("Unknown opcode to check as cheap as a move!"); } bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {