Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5797,23 +5797,19 @@ case MachineCombinerPattern::MULADDXI_OP1: { // MUL I=A,B,0 // ADD R,I,Imm - // ==> ORR V, ZR, Imm + // ==> MOV V, Imm // ==> MADD R,A,B,V // --- Create(MADD); const TargetRegisterClass *OrrRC; - unsigned BitSize, OrrOpc, ZeroReg; + unsigned BitSize; if (Pattern == MachineCombinerPattern::MULADDWI_OP1) { - OrrOpc = AArch64::ORRWri; OrrRC = &AArch64::GPR32spRegClass; BitSize = 32; - ZeroReg = AArch64::WZR; Opc = AArch64::MADDWrrr; RC = &AArch64::GPR32RegClass; } else { - OrrOpc = AArch64::ORRXri; OrrRC = &AArch64::GPR64spRegClass; BitSize = 64; - ZeroReg = AArch64::XZR; Opc = AArch64::MADDXrrr; RC = &AArch64::GPR64RegClass; } @@ -5825,13 +5821,16 @@ Imm = Imm << Val; } uint64_t UImm = SignExtend64(Imm, BitSize); - uint64_t Encoding; - if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) + // The immediate can be composed via a single instruction. + SmallVector Insn; + AArch64_IMM::expandMOVImm(UImm, BitSize, Insn); + if (Insn.size() != 1) return; + auto MovI = Insn.begin(); MachineInstrBuilder MIB1 = - BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) - .addReg(ZeroReg) - .addImm(Encoding); + BuildMI(MF, Root.getDebugLoc(), TII->get(MovI->Opcode), NewVR) + .addImm(MovI->Op1) + .addImm(MovI->Op2); InsInstrs.push_back(MIB1); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); @@ -5889,23 +5888,19 @@ case MachineCombinerPattern::MULSUBXI_OP1: { // MUL I=A,B,0 // SUB R,I, Imm - // ==> ORR V, ZR, -Imm + // ==> MOV V, -Imm // ==> MADD R,A,B,V // = -Imm + A*B // --- Create(MADD); const TargetRegisterClass *OrrRC; - unsigned BitSize, OrrOpc, ZeroReg; + unsigned BitSize; if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) { - OrrOpc = AArch64::ORRWri; OrrRC = &AArch64::GPR32spRegClass; BitSize = 32; - ZeroReg = AArch64::WZR; Opc = AArch64::MADDWrrr; RC = &AArch64::GPR32RegClass; } else { - OrrOpc = AArch64::ORRXri; OrrRC = &AArch64::GPR64spRegClass; BitSize = 64; - ZeroReg = AArch64::XZR; Opc = AArch64::MADDXrrr; RC = &AArch64::GPR64RegClass; } @@ -5916,13 +5911,16 @@ Imm = Imm << Val; } uint64_t UImm = SignExtend64(-Imm, BitSize); - uint64_t Encoding; - if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) + // The immediate can be composed via a single instruction. + SmallVector Insn; + AArch64_IMM::expandMOVImm(UImm, BitSize, Insn); + if (Insn.size() != 1) return; + auto MovI = Insn.begin(); MachineInstrBuilder MIB1 = - BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) - .addReg(ZeroReg) - .addImm(Encoding); + BuildMI(MF, Root.getDebugLoc(), TII->get(MovI->Opcode), NewVR) + .addImm(MovI->Op1) + .addImm(MovI->Op2); InsInstrs.push_back(MIB1); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); Index: llvm/test/CodeGen/AArch64/addimm-mulimm.ll =================================================================== --- llvm/test/CodeGen/AArch64/addimm-mulimm.ll +++ llvm/test/CodeGen/AArch64/addimm-mulimm.ll @@ -5,8 +5,8 @@ ; CHECK-LABEL: addimm_mulimm_accept_00: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: mul x8, x0, x8 -; CHECK-NEXT: add x0, x8, #1147 +; CHECK-NEXT: mov x9, #1147 +; CHECK-NEXT: madd x0, x0, x8, x9 ; CHECK-NEXT: ret %tmp0 = add i64 %a, 31 %tmp1 = mul i64 %tmp0, 37 @@ -17,8 +17,8 @@ ; CHECK-LABEL: addimm_mulimm_accept_01: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: mul x8, x0, x8 -; CHECK-NEXT: sub x0, x8, #1147 +; CHECK-NEXT: mov x9, #-1147 +; CHECK-NEXT: madd x0, x0, x8, x9 ; CHECK-NEXT: ret %tmp0 = add i64 %a, -31 %tmp1 = mul i64 %tmp0, 37 @@ -29,8 +29,8 @@ ; CHECK-LABEL: addimm_mulimm_accept_02: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: mul w8, w0, w8 -; CHECK-NEXT: add w0, w8, #1147 +; CHECK-NEXT: mov w9, #1147 +; CHECK-NEXT: madd w0, w0, w8, w9 ; CHECK-NEXT: ret %tmp0 = add i32 %a, 31 %tmp1 = mul i32 %tmp0, 37 @@ -41,8 +41,8 @@ ; CHECK-LABEL: addimm_mulimm_accept_03: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: mul w8, w0, w8 -; CHECK-NEXT: sub w0, w8, #1147 +; CHECK-NEXT: mov w9, #-1147 +; CHECK-NEXT: madd w0, w0, w8, w9 ; CHECK-NEXT: ret %tmp0 = add i32 %a, -31 %tmp1 = mul i32 %tmp0, 37 Index: llvm/test/CodeGen/AArch64/machine-outliner-throw.ll =================================================================== --- llvm/test/CodeGen/AArch64/machine-outliner-throw.ll +++ llvm/test/CodeGen/AArch64/machine-outliner-throw.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64-arm-none-eabi < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64-arm-none-eabi -stop-after=machine-outliner < %s | FileCheck %s -check-prefix=TARGET_FEATURES @@ -13,7 +14,7 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: orr w8, wzr, #0x1 +; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: madd w19, w0, w0, w8 ; CHECK-NEXT: mov w0, #4 ; CHECK-NEXT: bl __cxa_allocate_exception @@ -37,7 +38,7 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: mov w0, #4 -; CHECK-NEXT: orr w9, wzr, #0x1 +; CHECK-NEXT: mov w9, #1 ; CHECK-NEXT: madd w19, w8, w8, w9 ; CHECK-NEXT: bl __cxa_allocate_exception ; CHECK-NEXT: bl OUTLINED_FUNCTION_0 Index: llvm/test/CodeGen/AArch64/madd-combiner.ll =================================================================== --- llvm/test/CodeGen/AArch64/madd-combiner.ll +++ llvm/test/CodeGen/AArch64/madd-combiner.ll @@ -6,7 +6,7 @@ define i32 @mul_add_imm(i32 %a, i32 %b) { ; CHECK-LABEL: mul_add_imm: ; CHECK: ; %bb.0: -; CHECK-NEXT: orr w8, wzr, #0x4 +; CHECK-NEXT: mov w8, #4 ; CHECK-NEXT: madd w0, w0, w1, w8 ; CHECK-NEXT: ret %1 = mul i32 %a, %b @@ -39,7 +39,7 @@ ; CHECK-FAST-LABEL: mul_add_imm2: ; CHECK-FAST: ; %bb.0: ; %entry ; CHECK-FAST-NEXT: mov x8, #-3 -; CHECK-FAST-NEXT: orr x9, xzr, #0xfffffffffffffffd +; CHECK-FAST-NEXT: mov x9, #-3 ; CHECK-FAST-NEXT: madd x8, x8, x8, x9 ; CHECK-FAST-NEXT: mov x9, #45968 ; CHECK-FAST-NEXT: movk x9, #48484, lsl #16 Index: llvm/test/CodeGen/AArch64/mul_pow2.ll =================================================================== --- llvm/test/CodeGen/AArch64/mul_pow2.ll +++ llvm/test/CodeGen/AArch64/mul_pow2.ll @@ -290,6 +290,26 @@ ret i64 %sub } +; We may hoist the "mov" instructions out of a loop +define i32 @mull6_sub(i32 %x) { +; CHECK-LABEL: mull6_sub: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: madd w0, w0, w8, w9 +; CHECK-NEXT: ret +; +; GISEL-LABEL: mull6_sub: +; GISEL: // %bb.0: +; GISEL-NEXT: mov w8, #6 +; GISEL-NEXT: mov w9, #-1 +; GISEL-NEXT: madd w0, w0, w8, w9 +; GISEL-NEXT: ret + %mul = mul nsw i32 %x, 6 + %sub = add nsw i32 %mul, -1 + ret i32 %sub +} + define i32 @test7(i32 %x) { ; CHECK-LABEL: test7: ; CHECK: // %bb.0: @@ -731,11 +751,11 @@ ; ; GISEL-LABEL: muladd_demand_commute: ; GISEL: // %bb.0: -; GISEL-NEXT: adrp x8, .LCPI42_1 -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI42_1] -; GISEL-NEXT: adrp x8, .LCPI42_0 +; GISEL-NEXT: adrp x8, .LCPI43_1 +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI43_1] +; GISEL-NEXT: adrp x8, .LCPI43_0 ; GISEL-NEXT: mla v1.4s, v0.4s, v2.4s -; GISEL-NEXT: ldr q0, [x8, :lo12:.LCPI42_0] +; GISEL-NEXT: ldr q0, [x8, :lo12:.LCPI43_0] ; GISEL-NEXT: and v0.16b, v1.16b, v0.16b ; GISEL-NEXT: ret %m = mul <4 x i32> %x, Index: llvm/test/CodeGen/AArch64/srem-seteq.ll =================================================================== --- llvm/test/CodeGen/AArch64/srem-seteq.ll +++ llvm/test/CodeGen/AArch64/srem-seteq.ll @@ -47,7 +47,7 @@ ; CHECK-LABEL: test_srem_odd_bit30: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #43691 -; CHECK-NEXT: orr w9, wzr, #0x1 +; CHECK-NEXT: mov w9, #1 ; CHECK-NEXT: movk w8, #27306, lsl #16 ; CHECK-NEXT: madd w8, w0, w8, w9 ; CHECK-NEXT: cmp w8, #3 @@ -64,7 +64,7 @@ ; CHECK-LABEL: test_srem_odd_bit31: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #21845 -; CHECK-NEXT: orr w9, wzr, #0x1 +; CHECK-NEXT: mov w9, #1 ; CHECK-NEXT: movk w8, #54613, lsl #16 ; CHECK-NEXT: madd w8, w0, w8, w9 ; CHECK-NEXT: cmp w8, #3 @@ -122,7 +122,7 @@ ; CHECK-LABEL: test_srem_even_bit30: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #20165 -; CHECK-NEXT: orr w9, wzr, #0x8 +; CHECK-NEXT: mov w9, #8 ; CHECK-NEXT: movk w8, #64748, lsl #16 ; CHECK-NEXT: madd w8, w0, w8, w9 ; CHECK-NEXT: ror w8, w8, #3 @@ -140,7 +140,7 @@ ; CHECK-LABEL: test_srem_even_bit31: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1285 -; CHECK-NEXT: orr w9, wzr, #0x2 +; CHECK-NEXT: mov w9, #2 ; CHECK-NEXT: movk w8, #50437, lsl #16 ; CHECK-NEXT: madd w8, w0, w8, w9 ; CHECK-NEXT: ror w8, w8, #1 Index: llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll =================================================================== --- llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll +++ llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll @@ -137,11 +137,11 @@ ; CHECK-LABEL: t32_6_3: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #43691 -; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: mov w9, #-1 ; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: madd w8, w0, w8, w9 +; CHECK-NEXT: mov w9, #43691 ; CHECK-NEXT: movk w9, #10922, lsl #16 -; CHECK-NEXT: mul w8, w0, w8 -; CHECK-NEXT: sub w8, w8, #1 ; CHECK-NEXT: ror w8, w8, #1 ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: cset w0, lo @@ -209,8 +209,8 @@ ; CHECK-LABEL: t8_3_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-85 -; CHECK-NEXT: mul w8, w0, w8 -; CHECK-NEXT: sub w8, w8, #86 +; CHECK-NEXT: mov w9, #-86 +; CHECK-NEXT: madd w8, w0, w8, w9 ; CHECK-NEXT: and w8, w8, #0xff ; CHECK-NEXT: cmp w8, #85 ; CHECK-NEXT: cset w0, lo