Index: llvm/trunk/lib/Target/AArch64/AArch64.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64.td +++ llvm/trunk/lib/Target/AArch64/AArch64.td @@ -107,6 +107,10 @@ "fuse-aes", "HasFuseAES", "true", "CPU fuses AES crypto operations">; +def FeatureFuseLiterals : SubtargetFeature< + "fuse-literals", "HasFuseLiterals", "true", + "CPU fuses literal generation operations">; + def FeatureDisableLatencySchedHeuristic : SubtargetFeature< "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", "Disable latency scheduling heuristic">; @@ -189,6 +193,7 @@ FeatureCustomCheapAsMoveHandling, FeatureFPARMv8, FeatureFuseAES, + FeatureFuseLiterals, FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler, Index: llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -129,6 +129,31 @@ SecondOpcode == AArch64::INSTRUCTION_LIST_END; } + if (ST.hasFuseLiterals()) + // Fuse literal generation operations. + switch (FirstOpcode) { + // PC relative address. + case AArch64::ADRP: + return SecondOpcode == AArch64::ADDXri || + SecondOpcode == AArch64::INSTRUCTION_LIST_END; + // 32 bit immediate. + case AArch64::MOVZWi: + return (SecondOpcode == AArch64::MOVKWi && + Second->getOperand(3).getImm() == 16) || + SecondOpcode == AArch64::INSTRUCTION_LIST_END; + // Lower half of 64 bit immediate. + case AArch64::MOVZXi: + return (SecondOpcode == AArch64::MOVKXi && + Second->getOperand(3).getImm() == 16) || + SecondOpcode == AArch64::INSTRUCTION_LIST_END; + // Upper half of 64 bit immediate. + case AArch64::MOVKXi: + return First->getOperand(3).getImm() == 32 && + ((SecondOpcode == AArch64::MOVKXi && + Second->getOperand(3).getImm() == 48) || + SecondOpcode == AArch64::INSTRUCTION_LIST_END); + } + return false; } Index: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h @@ -85,6 +85,7 @@ bool HasArithmeticBccFusion = false; bool HasArithmeticCbzFusion = false; bool HasFuseAES = false; + bool HasFuseLiterals = false; bool DisableLatencySchedHeuristic = false; bool UseRSqrt = false; uint8_t MaxInterleaveFactor = 2; @@ -199,6 +200,7 @@ bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; } bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } bool hasFuseAES() const { return HasFuseAES; } + bool hasFuseLiterals() const { return HasFuseLiterals; } bool useRSqrt() const { return UseRSqrt; } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } unsigned getVectorInsertExtractBaseCost() const { Index: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -330,6 +330,20 @@ return DAG; } + ScheduleDAGInstrs * + createPostMachineScheduler(MachineSchedContext *C) const override { + const AArch64Subtarget &ST = C->MF->getSubtarget(); + if (ST.hasFuseLiterals()) { + // Run the Macro Fusion after RA again since literals are expanded from + // pseudos then (v. addPreSched2()). + ScheduleDAGMI *DAG = createGenericSchedPostRA(C); + DAG->addMutation(createAArch64MacroFusionDAGMutation()); + return DAG; + } + + return nullptr; + } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override;