Index: llvm/lib/Target/AArch64/AArch64.td =================================================================== --- llvm/lib/Target/AArch64/AArch64.td +++ llvm/lib/Target/AArch64/AArch64.td @@ -188,14 +188,18 @@ "fuse-aes", "HasFuseAES", "true", "CPU fuses AES crypto operations">; -def FeatureFuseCryptoEOR : SubtargetFeature< - "fuse-crypto-eor", "HasFuseCryptoEOR", "true", - "CPU fuses AES/PMULL and EOR operations">; +def FeatureFuseArithmeticLogic : SubtargetFeature< + "fuse-arith-logic", "HasFuseArithmeticLogic", "true", + "CPU fuses arithmetic and logic operations">; def FeatureFuseCCSelect : SubtargetFeature< "fuse-csel", "HasFuseCCSelect", "true", "CPU fuses conditional select operations">; +def FeatureFuseCryptoEOR : SubtargetFeature< + "fuse-crypto-eor", "HasFuseCryptoEOR", "true", + "CPU fuses AES/PMULL and EOR operations">; + def FeatureFuseLiterals : SubtargetFeature< "fuse-literals", "HasFuseLiterals", "true", "CPU fuses literal generation operations">; Index: llvm/lib/Target/AArch64/AArch64MacroFusion.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -270,7 +270,73 @@ return false; } -/// Check if the instr pair, FirstMI and SecondMI, should be fused +// Arithmetic and logic. +static bool isArithmeticLogicPair(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + if (AArch64InstrInfo::hasShiftedReg(SecondMI)) + return false; + + switch (SecondMI.getOpcode()) { + // Arithmetic + case AArch64::ADDWrs: + case AArch64::ADDXrs: + case AArch64::SUBWrs: + case AArch64::SUBXrs: + // Logic + case AArch64::ANDWrs: + case AArch64::ANDXrs: + case AArch64::BICWrs: + case AArch64::BICXrs: + case AArch64::EONWrs: + case AArch64::EONXrs: + case AArch64::EORWrs: + case AArch64::EORXrs: + case AArch64::ORNWrs: + case AArch64::ORNXrs: + case AArch64::ORRWrs: + case AArch64::ORRXrs: + // Assume the 1st instr to be a wildcard if it is unspecified. + if (FirstMI == nullptr) + return true; + + switch (FirstMI->getOpcode()) { + // Arithmetic + case AArch64::ADDWrs: + case AArch64::ADDXrs: + case AArch64::ADDSWrs: + case AArch64::ADDSXrs: + case AArch64::SUBWrs: + case AArch64::SUBXrs: + case AArch64::SUBSWrs: + case AArch64::SUBSXrs: + return !AArch64InstrInfo::hasShiftedReg(*FirstMI); + } + break; + + // Arithmetic setting flags. + case AArch64::ADDSWrs: + case AArch64::ADDSXrs: + case AArch64::SUBSWrs: + case AArch64::SUBSXrs: + // Assume the 1st instr to be a wildcard if it is unspecified. + if (FirstMI == nullptr) + return true; + + switch (FirstMI->getOpcode()) { + // Arithmetic + case AArch64::ADDWrs: + case AArch64::ADDXrs: + case AArch64::SUBWrs: + case AArch64::SUBXrs: + return !AArch64InstrInfo::hasShiftedReg(*FirstMI); + } + break; + } + + return false; +} + +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused /// together. Given SecondMI, when FirstMI is unspecified, then check if /// SecondMI may be part of a fused pair at all. static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, @@ -295,6 +361,8 @@ return true; if (ST.hasFuseCCSelect() && isCCSelectPair(FirstMI, SecondMI)) return true; + if (ST.hasFuseArithmeticLogic() && isArithmeticLogicPair(FirstMI, SecondMI)) + return true; return false; } Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -166,8 +166,9 @@ bool HasArithmeticCbzFusion = false; bool HasFuseAddress = false; bool HasFuseAES = false; - bool HasFuseCryptoEOR = false; + bool HasFuseArithmeticLogic = false; bool HasFuseCCSelect = false; + bool HasFuseCryptoEOR = false; bool HasFuseLiterals = false; bool DisableLatencySchedHeuristic = false; bool UseRSqrt = false; @@ -311,14 +312,16 @@ bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } bool hasFuseAddress() const { return HasFuseAddress; } bool hasFuseAES() const { return HasFuseAES; } - bool hasFuseCryptoEOR() const { return HasFuseCryptoEOR; } + bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; } bool hasFuseCCSelect() const { return HasFuseCCSelect; } + bool hasFuseCryptoEOR() const { return HasFuseCryptoEOR; } bool hasFuseLiterals() const { return HasFuseLiterals; } /// Return true if the CPU supports any kind of instruction fusion. bool hasFusion() const { return hasArithmeticBccFusion() || hasArithmeticCbzFusion() || - hasFuseAES() || hasFuseCCSelect() || hasFuseLiterals(); + hasFuseAES() || hasFuseArithmeticLogic() || + hasFuseCCSelect() || hasFuseLiterals(); } bool useRSqrt() const { return UseRSqrt; } Index: llvm/test/CodeGen/AArch64/misched-fusion-arith-logic.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/misched-fusion-arith-logic.ll @@ -0,0 +1,58 @@ +; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=fuse-arith-logic | FileCheck %s + +define i32 @arith(i32 %a0, i32 %a1, i32 %a2) { +entry: + %u0 = sub i32 %a0, %a1 + %u1 = add i32 %a1, %a2 + %v0 = sub i32 %u0, %a2 + %v1 = add i32 %u1, %a0 + %w0 = mul i32 %v0, %v1 + ret i32 %w0 + +; CHECK-LABEL: arith: +; CHECK: sub [[R:w[0-9]+]], {{w[0-9]+}}, {{w[0-9]+}} +; CHECK-NEXT: sub [[R]], [[R]], {{w[0-9]+}} +; CHECK: add [[S:w[0-9]+]], {{w[0-9]+}}, {{w[0-9]+}} +; CHECK-NEXT: add [[S]], [[S]], {{w[0-9]+}} +} + +define i64 @compare(i64 %a0, i64 %a1, i64 %a2) { +entry: + %u0 = add i64 %a0, %a1 + %u1 = sub i64 %a1, %a0 + %v0 = add i64 %u0, %a2 + %f1 = icmp eq i64 %u1, %a0 + br i1 %f1, label %if, label %exit + +if: + ret i64 %v0 + +exit: + ret i64 %u1 + +; CHECK-LABEL: compare: +; CHECK: add [[R:x[0-9]+]], {{x[0-9]+}}, {{x[0-9]+}} +; CHECK-NEXT: add [[R]], [[R]], {{x[0-9]+}} +; CHECK: sub [[S:x[0-9]+]], {{x[0-9]+}}, {{x[0-9]+}} +; CHECK-NEXT: cmp [[S]], {{x[0-9]+}} +} + +define i64 @arithno(i32 %a0, i32 %a1, i32 %a2) { +entry: + %b0 = zext i32 %a0 to i64 + %b1 = zext i32 %a2 to i64 + %t0 = sub i32 %a0, %a1 + %t1 = add i32 %a1, %a2 + %u0 = zext i32 %t0 to i64 + %u1 = zext i32 %t1 to i64 + %v0 = sub i64 %u0, %b0 + %v1 = add i64 %u1, %b1 + %w0 = mul i64 %v0, %v1 + ret i64 %w0 + +; CHECK-LABEL: arithno: +; CHECK: sub w[[R:[0-9]+]], {{w[0-9]+}}, {{w[0-9]+}} +; CHECK-NEXT: add w[[S:[0-9]+]], {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sub {{x[0-9]+}}, x[[R]], {{w[0-9]+}}, uxtw +; CHECK-NEXT: add {{x[0-9]+}}, x[[S]], {{w[0-9]+}}, uxtw +}