Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5797,7 +5797,7 @@ case MachineCombinerPattern::MULADDXI_OP1: { // MUL I=A,B,0 // ADD R,I,Imm - // ==> ORR V, ZR, Imm + // ==> ORR V, ZR, Imm or MOV V, Imm // ==> MADD R,A,B,V // --- Create(MADD); const TargetRegisterClass *OrrRC; @@ -5825,13 +5825,23 @@ Imm = Imm << Val; } uint64_t UImm = SignExtend64(Imm, BitSize); - uint64_t Encoding; - if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) + // The immediate can be composed via a single instruction. + SmallVector Insn; + AArch64_IMM::expandMOVImm(UImm, BitSize, Insn); + if (Insn.size() != 1) return; - MachineInstrBuilder MIB1 = - BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) - .addReg(ZeroReg) - .addImm(Encoding); + auto MovI = Insn.begin(); + MachineInstrBuilder MIB1; + uint64_t Encoding; + // MOV is an alias for one of three instructions: movz, movn, and orr. + if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) + MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) + .addReg(ZeroReg) + .addImm(Encoding); + else + MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(MovI->Opcode), NewVR) + .addImm(MovI->Op1) + .addImm(MovI->Op2); InsInstrs.push_back(MIB1); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); @@ -5889,7 +5899,7 @@ case MachineCombinerPattern::MULSUBXI_OP1: { // MUL I=A,B,0 // SUB R,I, Imm - // ==> ORR V, ZR, -Imm + // ==> ORR V, ZR, -Imm or MOV V, -Imm // ==> MADD R,A,B,V // = -Imm + A*B // --- Create(MADD); const TargetRegisterClass *OrrRC; @@ -5916,13 +5926,23 @@ Imm = Imm << Val; } uint64_t UImm = SignExtend64(-Imm, BitSize); - uint64_t Encoding; - if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) + // The immediate can be composed via a single instruction. + SmallVector Insn; + AArch64_IMM::expandMOVImm(UImm, BitSize, Insn); + if (Insn.size() != 1) return; - MachineInstrBuilder MIB1 = - BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) - .addReg(ZeroReg) - .addImm(Encoding); + auto MovI = Insn.begin(); + MachineInstrBuilder MIB1; + uint64_t Encoding; + // MOV is an alias for one of three instructions: movz, movn, and orr. + if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) + MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) + .addReg(ZeroReg) + .addImm(Encoding); + else + MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(MovI->Opcode), NewVR) + .addImm(MovI->Op1) + .addImm(MovI->Op2); InsInstrs.push_back(MIB1); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); Index: llvm/test/CodeGen/AArch64/addimm-mulimm.ll =================================================================== --- llvm/test/CodeGen/AArch64/addimm-mulimm.ll +++ llvm/test/CodeGen/AArch64/addimm-mulimm.ll @@ -5,8 +5,8 @@ ; CHECK-LABEL: addimm_mulimm_accept_00: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: mul x8, x0, x8 -; CHECK-NEXT: add x0, x8, #1147 +; CHECK-NEXT: mov x9, #1147 +; CHECK-NEXT: madd x0, x0, x8, x9 ; CHECK-NEXT: ret %tmp0 = add i64 %a, 31 %tmp1 = mul i64 %tmp0, 37 @@ -17,8 +17,8 @@ ; CHECK-LABEL: addimm_mulimm_accept_01: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: mul x8, x0, x8 -; CHECK-NEXT: sub x0, x8, #1147 +; CHECK-NEXT: mov x9, #-1147 +; CHECK-NEXT: madd x0, x0, x8, x9 ; CHECK-NEXT: ret %tmp0 = add i64 %a, -31 %tmp1 = mul i64 %tmp0, 37 @@ -29,8 +29,8 @@ ; CHECK-LABEL: addimm_mulimm_accept_02: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: mul w8, w0, w8 -; CHECK-NEXT: add w0, w8, #1147 +; CHECK-NEXT: mov w9, #1147 +; CHECK-NEXT: madd w0, w0, w8, w9 ; CHECK-NEXT: ret %tmp0 = add i32 %a, 31 %tmp1 = mul i32 %tmp0, 37 @@ -41,8 +41,8 @@ ; CHECK-LABEL: addimm_mulimm_accept_03: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: mul w8, w0, w8 -; CHECK-NEXT: sub w0, w8, #1147 +; CHECK-NEXT: mov w9, #-1147 +; CHECK-NEXT: madd w0, w0, w8, w9 ; CHECK-NEXT: ret %tmp0 = add i32 %a, -31 %tmp1 = mul i32 %tmp0, 37 Index: llvm/test/CodeGen/AArch64/mul_pow2.ll =================================================================== --- llvm/test/CodeGen/AArch64/mul_pow2.ll +++ llvm/test/CodeGen/AArch64/mul_pow2.ll @@ -290,6 +290,26 @@ ret i64 %sub } +; We may hoist the "mov" instructions out of a loop +define i32 @mull6_sub(i32 %x) { +; CHECK-LABEL: mull6_sub: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: madd w0, w0, w8, w9 +; CHECK-NEXT: ret +; +; GISEL-LABEL: mull6_sub: +; GISEL: // %bb.0: +; GISEL-NEXT: mov w8, #6 +; GISEL-NEXT: mov w9, #-1 +; GISEL-NEXT: madd w0, w0, w8, w9 +; GISEL-NEXT: ret + %mul = mul nsw i32 %x, 6 + %sub = add nsw i32 %mul, -1 + ret i32 %sub +} + define i32 @test7(i32 %x) { ; CHECK-LABEL: test7: ; CHECK: // %bb.0: @@ -731,11 +751,11 @@ ; ; GISEL-LABEL: muladd_demand_commute: ; GISEL: // %bb.0: -; GISEL-NEXT: adrp x8, .LCPI42_1 -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI42_1] -; GISEL-NEXT: adrp x8, .LCPI42_0 +; GISEL-NEXT: adrp x8, .LCPI43_1 +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI43_1] +; GISEL-NEXT: adrp x8, .LCPI43_0 ; GISEL-NEXT: mla v1.4s, v0.4s, v2.4s -; GISEL-NEXT: ldr q0, [x8, :lo12:.LCPI42_0] +; GISEL-NEXT: ldr q0, [x8, :lo12:.LCPI43_0] ; GISEL-NEXT: and v0.16b, v1.16b, v0.16b ; GISEL-NEXT: ret %m = mul <4 x i32> %x, Index: llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll =================================================================== --- llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll +++ llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll @@ -137,11 +137,11 @@ ; CHECK-LABEL: t32_6_3: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #43691 -; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: mov w9, #-1 ; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: madd w8, w0, w8, w9 +; CHECK-NEXT: mov w9, #43691 ; CHECK-NEXT: movk w9, #10922, lsl #16 -; CHECK-NEXT: mul w8, w0, w8 -; CHECK-NEXT: sub w8, w8, #1 ; CHECK-NEXT: ror w8, w8, #1 ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: cset w0, lo @@ -209,8 +209,8 @@ ; CHECK-LABEL: t8_3_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-85 -; CHECK-NEXT: mul w8, w0, w8 -; CHECK-NEXT: sub w8, w8, #86 +; CHECK-NEXT: mov w9, #-86 +; CHECK-NEXT: madd w8, w0, w8, w9 ; CHECK-NEXT: and w8, w8, #0xff ; CHECK-NEXT: cmp w8, #85 ; CHECK-NEXT: cset w0, lo