Index: llvm/trunk/lib/Target/ARM/ARM.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARM.td +++ llvm/trunk/lib/Target/ARM/ARM.td @@ -141,6 +141,10 @@ def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", "CPU fuses AES crypto operations">; +// Fast execution of bottom and top halves of literal generation +def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true", + "CPU fuses literal generation operations">; + // The way of reading thread pointer def FeatureReadTp : SubtargetFeature<"read-tp-hard", "ReadTPHard", "true", "Reading thread pointer from register">; Index: llvm/trunk/lib/Target/ARM/ARMMacroFusion.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMMacroFusion.cpp +++ llvm/trunk/lib/Target/ARM/ARMMacroFusion.cpp @@ -19,6 +19,47 @@ namespace llvm { +// Fuse AES crypto encoding or decoding. +static bool isAESPair(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + // Assume the 1st instr to be a wildcard if it is unspecified. + unsigned FirstOpcode = + FirstMI ? FirstMI->getOpcode() + : static_cast(ARM::INSTRUCTION_LIST_END); + unsigned SecondOpcode = SecondMI.getOpcode(); + + switch(SecondOpcode) { + // AES encode. + case ARM::AESMC : + return FirstOpcode == ARM::AESE || + FirstOpcode == ARM::INSTRUCTION_LIST_END; + // AES decode. + case ARM::AESIMC: + return FirstOpcode == ARM::AESD || + FirstOpcode == ARM::INSTRUCTION_LIST_END; + } + + return false; +} + +// Fuse literal generation. +static bool isLiteralsPair(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + // Assume the 1st instr to be a wildcard if it is unspecified. + unsigned FirstOpcode = + FirstMI ? FirstMI->getOpcode() + : static_cast(ARM::INSTRUCTION_LIST_END); + unsigned SecondOpcode = SecondMI.getOpcode(); + + // 32 bit immediate. + if ((FirstOpcode == ARM::INSTRUCTION_LIST_END || + FirstOpcode == ARM::MOVi16) && + SecondOpcode == ARM::MOVTi16) + return true; + + return false; +} + /// Check if the instr pair, FirstMI and SecondMI, should be fused /// together. Given SecondMI, when FirstMI is unspecified, then check if /// SecondMI may be part of a fused pair at all. @@ -28,24 +69,10 @@ const MachineInstr &SecondMI) { const ARMSubtarget &ST = static_cast(TSI); - // Assume wildcards for unspecified instrs. - unsigned FirstOpcode = - FirstMI ? FirstMI->getOpcode() - : static_cast(ARM::INSTRUCTION_LIST_END); - unsigned SecondOpcode = SecondMI.getOpcode(); - - if (ST.hasFuseAES()) - // Fuse AES crypto operations. - switch(SecondOpcode) { - // AES encode. - case ARM::AESMC : - return FirstOpcode == ARM::AESE || - FirstOpcode == ARM::INSTRUCTION_LIST_END; - // AES decode. - case ARM::AESIMC: - return FirstOpcode == ARM::AESD || - FirstOpcode == ARM::INSTRUCTION_LIST_END; - } + if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI)) + return true; + if (ST.hasFuseLiterals() && isLiteralsPair(FirstMI, SecondMI)) + return true; return false; } Index: llvm/trunk/lib/Target/ARM/ARMSubtarget.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMSubtarget.h +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h @@ -327,6 +327,10 @@ /// pairs faster. bool HasFuseAES = false; + /// HasFuseLiterals - if true, processor executes back to back + /// bottom and top halves of literal generation faster. + bool HasFuseLiterals = false; + /// If true, if conversion may decide to leave some instructions unpredicated. bool IsProfitableToUnpredicate = false; @@ -616,8 +620,9 @@ bool hasFullFP16() const { return HasFullFP16; } bool hasFuseAES() const { return HasFuseAES; } + bool hasFuseLiterals() const { return HasFuseLiterals; } /// Return true if the CPU supports any kind of instruction fusion. - bool hasFusion() const { return hasFuseAES(); } + bool hasFusion() const { return hasFuseAES() || hasFuseLiterals(); } const Triple &getTargetTriple() const { return TargetTriple; } Index: llvm/trunk/test/CodeGen/ARM/misched-fusion-lit.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/misched-fusion-lit.ll +++ llvm/trunk/test/CodeGen/ARM/misched-fusion-lit.ll @@ -0,0 +1,39 @@ +; RUN: llc %s -o - -mtriple=armv8-unknown -mattr=-fuse-literals,+use-misched | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKDONT +; RUN: llc %s -o - -mtriple=armv8-unknown -mattr=+fuse-literals,+use-misched | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE + +@g = common global i32* zeroinitializer + +define i32* @litp(i32 %a, i32 %b) { +entry: + %add = add nsw i32 %b, %a + %ptr = getelementptr i32, i32* bitcast (i32* (i32, i32)* @litp to i32*), i32 %add + %res = getelementptr i32, i32* bitcast (i32** @g to i32*), i32 %add + store i32* %ptr, i32** @g, align 4 + ret i32* %res + +; CHECK-LABEL: litp: +; CHECK: movw [[R:r[0-9]+]], :lower16:litp +; CHECKDONT-NEXT: movw [[S:r[0-9]+]], :lower16:g +; CHECKFUSE-NEXT: movt [[R]], :upper16:litp +; CHECKFUSE-NEXT: movw [[S:r[0-9]+]], :lower16:g +; CHECKFUSE-NEXT: movt [[S]], :upper16:g +} + +define i32 @liti(i32 %a, i32 %b) { +entry: + %adda = add i32 %a, -262095121 + %add1 = add i32 %adda, %b + %addb = add i32 %b, 121110837 + %add2 = add i32 %addb, %a + store i32 %add1, i32* bitcast (i32** @g to i32*), align 4 + ret i32 %add2 + +; CHECK-LABEL: liti: +; CHECK: movw [[R:r[0-9]+]], #309 +; CHECKDONT-NEXT: add {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}} +; CHECKFUSE-NEXT: movt [[R]], #1848 +; CHECKFUSE: movw [[S:r[0-9]+]], :lower16:g +; CHECKFUSE-NEXT: movt [[S]], :upper16:g +; CHECKFUSE-NEXT: movw [[T:r[0-9]+]], #48879 +; CHECKFUSE-NEXT: movt [[T]], #61536 +}