Index: llvm/lib/Target/AArch64/AArch64.td =================================================================== --- llvm/lib/Target/AArch64/AArch64.td +++ llvm/lib/Target/AArch64/AArch64.td @@ -119,6 +119,10 @@ "fuse-aes", "HasFuseAES", "true", "CPU fuses AES crypto operations">; +def FeatureFuseCCSelect : SubtargetFeature< + "fuse-csel", "HasFuseCCSelect", "true", + "CPU fuses conditional select operations">; + def FeatureFuseLiterals : SubtargetFeature< "fuse-literals", "HasFuseLiterals", "true", "CPU fuses literal generation operations">; @@ -337,6 +341,7 @@ FeatureCustomCheapAsMoveHandling, FeatureFPARMv8, FeatureFuseAES, + FeatureFuseCCSelect, FeatureFuseLiterals, FeatureNEON, FeaturePerfMon, Index: llvm/lib/Target/AArch64/AArch64MacroFusion.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -40,8 +40,8 @@ // Fuse CMN, CMP, TST followed by Bcc. if (SecondOpcode == AArch64::Bcc) switch (FirstOpcode) { - default: - return false; + case AArch64::INSTRUCTION_LIST_END: + return true; case AArch64::ADDSWri: case AArch64::ADDSWrr: case AArch64::ADDSXri: @@ -66,9 +66,9 @@ case AArch64::BICSWrs: case AArch64::BICSXrs: // Shift value can be 0 making these behave like the "rr" variant... - return !II.hasShiftedReg(*FirstMI); - case AArch64::INSTRUCTION_LIST_END: - return true; + if (!II.hasShiftedReg(*FirstMI)) + return true; + break; } if (ST.hasArithmeticCbzFusion()) @@ -76,8 +76,8 @@ if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX || SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) switch (FirstOpcode) { - default: - return false; + case AArch64::INSTRUCTION_LIST_END: + return true; case AArch64::ADDWri: case AArch64::ADDWrr: case AArch64::ADDXri: @@ -108,46 +108,96 @@ case AArch64::BICWrs: case AArch64::BICXrs: // Shift value can be 0 making these behave like the "rr" variant... - return !II.hasShiftedReg(*FirstMI); - case AArch64::INSTRUCTION_LIST_END: - return true; + if (!II.hasShiftedReg(*FirstMI)) + return true; + break; } - if (ST.hasFuseAES()) + if (ST.hasFuseAES()) { // Fuse AES crypto operations. - switch(SecondOpcode) { // AES encode. - case AArch64::AESMCrr: - case AArch64::AESMCrrTied: - return FirstOpcode == AArch64::AESErr || - FirstOpcode == AArch64::INSTRUCTION_LIST_END; + if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END || + FirstOpcode == AArch64::AESErr) && + (SecondOpcode == AArch64::AESMCrr || + SecondOpcode == AArch64::AESMCrrTied)) + return true; // AES decode. - case AArch64::AESIMCrr: - case AArch64::AESIMCrrTied: - return FirstOpcode == AArch64::AESDrr || - FirstOpcode == AArch64::INSTRUCTION_LIST_END; - } + else if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END || + FirstOpcode == AArch64::AESDrr) && + (SecondOpcode == AArch64::AESIMCrr || + SecondOpcode == AArch64::AESIMCrrTied)) + return true; + } - if (ST.hasFuseLiterals()) + if (ST.hasFuseLiterals()) { // Fuse literal generation operations. - switch (SecondOpcode) { // PC relative address. - case AArch64::ADDXri: - return FirstOpcode == AArch64::ADRP || - FirstOpcode == AArch64::INSTRUCTION_LIST_END; + if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END || + FirstOpcode == AArch64::ADRP) && + SecondOpcode == AArch64::ADDXri) + return true; // 32 bit immediate. - case AArch64::MOVKWi: - return (FirstOpcode == AArch64::MOVZWi && - SecondMI.getOperand(3).getImm() == 16) || - FirstOpcode == AArch64::INSTRUCTION_LIST_END; - // Lower and upper half of 64 bit immediate. - case AArch64::MOVKXi: - return FirstOpcode == AArch64::INSTRUCTION_LIST_END || - (FirstOpcode == AArch64::MOVZXi && - SecondMI.getOperand(3).getImm() == 16) || - (FirstOpcode == AArch64::MOVKXi && - FirstMI->getOperand(3).getImm() == 32 && - SecondMI.getOperand(3).getImm() == 48); + else if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END || + FirstOpcode == AArch64::MOVZWi) && + (SecondOpcode == AArch64::MOVKWi && + SecondMI.getOperand(3).getImm() == 16)) + return true; + // Lower half of 64 bit immediate. + else if((FirstOpcode == AArch64::INSTRUCTION_LIST_END || + FirstOpcode == AArch64::MOVZXi) && + (SecondOpcode == AArch64::MOVKXi && + SecondMI.getOperand(3).getImm() == 16)) + return true; + // Upper half of 64 bit immediate. + else if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END || + (FirstOpcode == AArch64::MOVKXi && + FirstMI->getOperand(3).getImm() == 32)) && + (SecondOpcode == AArch64::MOVKXi && + SecondMI.getOperand(3).getImm() == 48)) + return true; + } + + if (ST.hasFuseCCSelect()) { + // Fuse conditional select operations. + if (SecondOpcode == AArch64::CSELWr) + // 32 bits + switch (FirstOpcode) { + case AArch64::INSTRUCTION_LIST_END: + return true; + case AArch64::SUBSWrs: + if (II.hasShiftedReg(*FirstMI)) + break; + // Fall through. + case AArch64::SUBSWrx: + if (II.hasExtendedReg(*FirstMI)) + break; + // Fall through. + case AArch64::SUBSWrr: + case AArch64::SUBSWri: + if (FirstMI->definesRegister(AArch64::WZR)) + return true; + break; + } + else if (SecondOpcode == AArch64::CSELXr) + // 64 bits + switch (FirstOpcode) { + case AArch64::INSTRUCTION_LIST_END: + return true; + case AArch64::SUBSXrs: + if (II.hasShiftedReg(*FirstMI)) + break; + // Fall through. + case AArch64::SUBSXrx: + case AArch64::SUBSXrx64: + if (II.hasExtendedReg(*FirstMI)) + break; + // Fall through. + case AArch64::SUBSXrr: + case AArch64::SUBSXri: + if (FirstMI->definesRegister(AArch64::XZR)) + return true; + break; + } } return false; Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -110,6 +110,7 @@ bool HasArithmeticBccFusion = false; bool HasArithmeticCbzFusion = false; bool HasFuseAES = false; + bool HasFuseCCSelect = false; bool HasFuseLiterals = false; bool DisableLatencySchedHeuristic = false; bool UseRSqrt = false; @@ -234,6 +235,7 @@ bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; } bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } bool hasFuseAES() const { return HasFuseAES; } + bool hasFuseCCSelect() const { return HasFuseCCSelect; } bool hasFuseLiterals() const { return HasFuseLiterals; } /// \brief Return true if the CPU supports any kind of instruction fusion. Index: llvm/test/CodeGen/AArch64/misched-fusion-csel.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/misched-fusion-csel.ll @@ -0,0 +1,30 @@ +; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=fuse-csel | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s + +target triple = "aarch64-unknown" + +define i32 @test_sub_cselw(i32 %a0, i32 %a1, i32 %a2) { +entry: + %v0 = sub i32 %a0, 13 + %cond = icmp eq i32 %v0, 0 + %v1 = add i32 %a1, 7 + %v2 = select i1 %cond, i32 %a0, i32 %v1 + ret i32 %v2 + +; CHECK-LABEL: test_sub_cselw: +; CHECK: cmp {{w[0-9]}}, #13 +; CHECK-NEXT: csel {{w[0-9]}} +} + +define i64 @test_sub_cselx(i64 %a0, i64 %a1, i64 %a2) { +entry: + %v0 = sub i64 %a0, 13 + %cond = icmp eq i64 %v0, 0 + %v1 = add i64 %a1, 7 + %v2 = select i1 %cond, i64 %a0, i64 %v1 + ret i64 %v2 + +; CHECK-LABEL: test_sub_cselx: +; CHECK: cmp {{x[0-9]}}, #13 +; CHECK-NEXT: csel {{x[0-9]}} +}