diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5796,7 +5796,7 @@ case MachineCombinerPattern::MULADDXI_OP1: { // MUL I=A,B,0 // ADD R,I,Imm - // ==> ORR V, ZR, Imm + // ==> MOV V, Imm // ==> MADD R,A,B,V // --- Create(MADD); const TargetRegisterClass *OrrRC; @@ -5824,13 +5824,31 @@ Imm = Imm << Val; } uint64_t UImm = SignExtend64(Imm, BitSize); - uint64_t Encoding; - if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) + // The immediate can be composed via a single instruction. + SmallVector Insn; + AArch64_IMM::expandMOVImm(UImm, BitSize, Insn); + if (Insn.size() != 1) return; - MachineInstrBuilder MIB1 = - BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) - .addReg(ZeroReg) - .addImm(Encoding); + auto MovI = Insn.begin(); + MachineInstrBuilder MIB1; + // MOV is an alias for one of three instructions: movz, movn, and orr. + if (MovI->Opcode == OrrOpc) + MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) + .addReg(ZeroReg) + .addImm(MovI->Op2); + else { + if (BitSize == 32) + assert((MovI->Opcode == AArch64::MOVNWi || + MovI->Opcode == AArch64::MOVZWi) && + "Expected opcode"); + else + assert((MovI->Opcode == AArch64::MOVNXi || + MovI->Opcode == AArch64::MOVZXi) && + "Expected opcode"); + MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(MovI->Opcode), NewVR) + .addImm(MovI->Op1) + .addImm(MovI->Op2); + } InsInstrs.push_back(MIB1); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); @@ -5888,7 +5906,7 @@ case MachineCombinerPattern::MULSUBXI_OP1: { // MUL I=A,B,0 // SUB R,I, Imm - // ==> ORR V, ZR, -Imm + // ==> MOV V, -Imm // ==> MADD R,A,B,V // = -Imm + A*B // --- Create(MADD); const TargetRegisterClass *OrrRC; @@ -5915,13 +5933,31 @@ Imm = Imm << Val; } uint64_t UImm = SignExtend64(-Imm, BitSize); - uint64_t Encoding; - if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) + // The immediate can be composed via a single instruction. + SmallVector Insn; + AArch64_IMM::expandMOVImm(UImm, BitSize, Insn); + if (Insn.size() != 1) return; - MachineInstrBuilder MIB1 = - BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) - .addReg(ZeroReg) - .addImm(Encoding); + auto MovI = Insn.begin(); + MachineInstrBuilder MIB1; + // MOV is an alias for one of three instructions: movz, movn, and orr. + if (MovI->Opcode == OrrOpc) + MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) + .addReg(ZeroReg) + .addImm(MovI->Op2); + else { + if (BitSize == 32) + assert((MovI->Opcode == AArch64::MOVNWi || + MovI->Opcode == AArch64::MOVZWi) && + "Expected opcode"); + else + assert((MovI->Opcode == AArch64::MOVNXi || + MovI->Opcode == AArch64::MOVZXi) && + "Expected opcode"); + MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(MovI->Opcode), NewVR) + .addImm(MovI->Op1) + .addImm(MovI->Op2); + } InsInstrs.push_back(MIB1); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); diff --git a/llvm/test/CodeGen/AArch64/addimm-mulimm.ll b/llvm/test/CodeGen/AArch64/addimm-mulimm.ll --- a/llvm/test/CodeGen/AArch64/addimm-mulimm.ll +++ b/llvm/test/CodeGen/AArch64/addimm-mulimm.ll @@ -5,8 +5,8 @@ ; CHECK-LABEL: addimm_mulimm_accept_00: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: mul x8, x0, x8 -; CHECK-NEXT: add x0, x8, #1147 +; CHECK-NEXT: mov x9, #1147 +; CHECK-NEXT: madd x0, x0, x8, x9 ; CHECK-NEXT: ret %tmp0 = add i64 %a, 31 %tmp1 = mul i64 %tmp0, 37 @@ -17,8 +17,8 @@ ; CHECK-LABEL: addimm_mulimm_accept_01: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: mul x8, x0, x8 -; CHECK-NEXT: sub x0, x8, #1147 +; CHECK-NEXT: mov x9, #-1147 +; CHECK-NEXT: madd x0, x0, x8, x9 ; CHECK-NEXT: ret %tmp0 = add i64 %a, -31 %tmp1 = mul i64 %tmp0, 37 @@ -29,8 +29,8 @@ ; CHECK-LABEL: addimm_mulimm_accept_02: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: mul w8, w0, w8 -; CHECK-NEXT: add w0, w8, #1147 +; CHECK-NEXT: mov w9, #1147 +; CHECK-NEXT: madd w0, w0, w8, w9 ; CHECK-NEXT: ret %tmp0 = add i32 %a, 31 %tmp1 = mul i32 %tmp0, 37 @@ -41,8 +41,8 @@ ; CHECK-LABEL: addimm_mulimm_accept_03: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: mul w8, w0, w8 -; CHECK-NEXT: sub w0, w8, #1147 +; CHECK-NEXT: mov w9, #-1147 +; CHECK-NEXT: madd w0, w0, w8, w9 ; CHECK-NEXT: ret %tmp0 = add i32 %a, -31 %tmp1 = mul i32 %tmp0, 37 diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll b/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll --- a/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll @@ -13,7 +13,7 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: orr w8, wzr, #0x1 +; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: madd w19, w0, w0, w8 ; CHECK-NEXT: mov w0, #4 ; CHECK-NEXT: bl __cxa_allocate_exception @@ -37,7 +37,7 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: mov w0, #4 -; CHECK-NEXT: orr w9, wzr, #0x1 +; CHECK-NEXT: mov w9, #1 ; CHECK-NEXT: madd w19, w8, w8, w9 ; CHECK-NEXT: bl __cxa_allocate_exception ; CHECK-NEXT: bl OUTLINED_FUNCTION_0 diff --git a/llvm/test/CodeGen/AArch64/madd-combiner.ll b/llvm/test/CodeGen/AArch64/madd-combiner.ll --- a/llvm/test/CodeGen/AArch64/madd-combiner.ll +++ b/llvm/test/CodeGen/AArch64/madd-combiner.ll @@ -6,7 +6,7 @@ define i32 @mul_add_imm(i32 %a, i32 %b) { ; CHECK-LABEL: mul_add_imm: ; CHECK: ; %bb.0: -; CHECK-NEXT: orr w8, wzr, #0x4 +; CHECK-NEXT: mov w8, #4 ; CHECK-NEXT: madd w0, w0, w1, w8 ; CHECK-NEXT: ret %1 = mul i32 %a, %b @@ -39,7 +39,7 @@ ; CHECK-FAST-LABEL: mul_add_imm2: ; CHECK-FAST: ; %bb.0: ; %entry ; CHECK-FAST-NEXT: mov x8, #-3 -; CHECK-FAST-NEXT: orr x9, xzr, #0xfffffffffffffffd +; CHECK-FAST-NEXT: mov x9, #-3 ; CHECK-FAST-NEXT: madd x8, x8, x8, x9 ; CHECK-FAST-NEXT: mov x9, #45968 ; CHECK-FAST-NEXT: movk x9, #48484, lsl #16 diff --git a/llvm/test/CodeGen/AArch64/mul_pow2.ll b/llvm/test/CodeGen/AArch64/mul_pow2.ll --- a/llvm/test/CodeGen/AArch64/mul_pow2.ll +++ b/llvm/test/CodeGen/AArch64/mul_pow2.ll @@ -290,6 +290,45 @@ ret i64 %sub } +; We may hoist the "mov" instructions out of a loop +define i32 @mull6_sub(i32 %x) { +; CHECK-LABEL: mull6_sub: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: madd w0, w0, w8, w9 +; CHECK-NEXT: ret +; +; GISEL-LABEL: mull6_sub: +; GISEL: // %bb.0: +; GISEL-NEXT: mov w8, #6 +; GISEL-NEXT: mov w9, #-1 +; GISEL-NEXT: madd w0, w0, w8, w9 +; GISEL-NEXT: ret + %mul = mul nsw i32 %x, 6 + %sub = add nsw i32 %mul, -1 + ret i32 %sub +} + +define i64 @mull6_sub_orr(i64 %x) { +; CHECK-LABEL: mull6_sub_orr: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: mov x9, #16773120 +; CHECK-NEXT: madd x0, x0, x8, x9 +; CHECK-NEXT: ret +; +; GISEL-LABEL: mull6_sub_orr: +; GISEL: // %bb.0: +; GISEL-NEXT: mov w8, #6 +; GISEL-NEXT: mov x9, #16773120 +; GISEL-NEXT: madd x0, x0, x8, x9 +; GISEL-NEXT: ret + %mul = mul nsw i64 %x, 6 + %sub = add nsw i64 %mul, 16773120 + ret i64 %sub +} + define i32 @test7(i32 %x) { ; CHECK-LABEL: test7: ; CHECK: // %bb.0: @@ -731,11 +770,11 @@ ; ; GISEL-LABEL: muladd_demand_commute: ; GISEL: // %bb.0: -; GISEL-NEXT: adrp x8, .LCPI42_1 -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI42_1] -; GISEL-NEXT: adrp x8, .LCPI42_0 +; GISEL-NEXT: adrp x8, .LCPI44_1 +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI44_1] +; GISEL-NEXT: adrp x8, .LCPI44_0 ; GISEL-NEXT: mla v1.4s, v0.4s, v2.4s -; GISEL-NEXT: ldr q0, [x8, :lo12:.LCPI42_0] +; GISEL-NEXT: ldr q0, [x8, :lo12:.LCPI44_0] ; GISEL-NEXT: and v0.16b, v1.16b, v0.16b ; GISEL-NEXT: ret %m = mul <4 x i32> %x, diff --git a/llvm/test/CodeGen/AArch64/srem-seteq.ll b/llvm/test/CodeGen/AArch64/srem-seteq.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq.ll @@ -47,7 +47,7 @@ ; CHECK-LABEL: test_srem_odd_bit30: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #43691 -; CHECK-NEXT: orr w9, wzr, #0x1 +; CHECK-NEXT: mov w9, #1 ; CHECK-NEXT: movk w8, #27306, lsl #16 ; CHECK-NEXT: madd w8, w0, w8, w9 ; CHECK-NEXT: cmp w8, #3 @@ -64,7 +64,7 @@ ; CHECK-LABEL: test_srem_odd_bit31: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #21845 -; CHECK-NEXT: orr w9, wzr, #0x1 +; CHECK-NEXT: mov w9, #1 ; CHECK-NEXT: movk w8, #54613, lsl #16 ; CHECK-NEXT: madd w8, w0, w8, w9 ; CHECK-NEXT: cmp w8, #3 @@ -122,7 +122,7 @@ ; CHECK-LABEL: test_srem_even_bit30: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #20165 -; CHECK-NEXT: orr w9, wzr, #0x8 +; CHECK-NEXT: mov w9, #8 ; CHECK-NEXT: movk w8, #64748, lsl #16 ; CHECK-NEXT: madd w8, w0, w8, w9 ; CHECK-NEXT: ror w8, w8, #3 @@ -140,7 +140,7 @@ ; CHECK-LABEL: test_srem_even_bit31: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1285 -; CHECK-NEXT: orr w9, wzr, #0x2 +; CHECK-NEXT: mov w9, #2 ; CHECK-NEXT: movk w8, #50437, lsl #16 ; CHECK-NEXT: madd w8, w0, w8, w9 ; CHECK-NEXT: ror w8, w8, #1 diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll b/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll @@ -137,11 +137,11 @@ ; CHECK-LABEL: t32_6_3: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #43691 -; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: mov w9, #-1 ; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: madd w8, w0, w8, w9 +; CHECK-NEXT: mov w9, #43691 ; CHECK-NEXT: movk w9, #10922, lsl #16 -; CHECK-NEXT: mul w8, w0, w8 -; CHECK-NEXT: sub w8, w8, #1 ; CHECK-NEXT: ror w8, w8, #1 ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: cset w0, lo @@ -209,8 +209,8 @@ ; CHECK-LABEL: t8_3_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-85 -; CHECK-NEXT: mul w8, w0, w8 -; CHECK-NEXT: sub w8, w8, #86 +; CHECK-NEXT: mov w9, #-86 +; CHECK-NEXT: madd w8, w0, w8, w9 ; CHECK-NEXT: and w8, w8, #0xff ; CHECK-NEXT: cmp w8, #85 ; CHECK-NEXT: cset w0, lo