diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -60,12 +60,13 @@ MachineLoopInfo *MLI; MachineRegisterInfo *MRI; + using OpcodePair = std::pair; template using SplitAndOpcFunc = - std::function(T, unsigned, T &, T &)>; + std::function(T, unsigned, T &, T &)>; using BuildMIFunc = - std::function; + std::function; /// For instructions where an immediate operand could be split into two /// separate immediate instructions, use the splitTwoPartImm two handle the @@ -90,6 +91,10 @@ MachineInstr *&SubregToRegMI); template + bool visitGenericADDSUB(OpcodePair PosOpcs, OpcodePair NegOpcs, + MachineInstr &MI, + SmallSetVector &ToBeRemoved); + template bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI, SmallSetVector &ToBeRemoved); template @@ -171,20 +176,20 @@ return splitTwoPartImm( MI, ToBeRemoved, - [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional { + [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional { if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1)) - return Opc; + return std::make_pair(Opc, Opc); return None; }, - [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0, + [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, unsigned Imm1, Register SrcReg, Register NewTmpReg, Register NewDstReg) { DebugLoc DL = MI.getDebugLoc(); MachineBasicBlock *MBB = MI.getParent(); - BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg) + BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) .addReg(SrcReg) .addImm(Imm0); - BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg) + BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) .addReg(NewTmpReg) .addImm(Imm1); }); @@ -254,8 +259,8 @@ } template -bool AArch64MIPeepholeOpt::visitADDSUB( - unsigned PosOpc, unsigned NegOpc, MachineInstr &MI, +bool AArch64MIPeepholeOpt::visitGenericADDSUB( + OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI, SmallSetVector &ToBeRemoved) { // Try below transformation. // @@ -272,30 +277,38 @@ return splitTwoPartImm( MI, ToBeRemoved, - [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0, - T &Imm1) -> Optional { + [PosOpcs, NegOpcs](T Imm, unsigned RegSize, T &Imm0, + T &Imm1) -> Optional { if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) - return PosOpc; + return PosOpcs; if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) - return NegOpc; + return NegOpcs; return None; }, - [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0, + [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, unsigned Imm1, Register SrcReg, Register NewTmpReg, Register NewDstReg) { DebugLoc DL = MI.getDebugLoc(); MachineBasicBlock *MBB = MI.getParent(); - BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg) + BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) .addReg(SrcReg) .addImm(Imm0) .addImm(12); - BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg) + BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) .addReg(NewTmpReg) .addImm(Imm1) .addImm(0); }); } +template +bool AArch64MIPeepholeOpt::visitADDSUB( + unsigned PosOpc, unsigned NegOpc, MachineInstr &MI, + SmallSetVector &ToBeRemoved) { + return visitGenericADDSUB({PosOpc, PosOpc}, {NegOpc, NegOpc}, MI, + ToBeRemoved); +} + // Checks if the corresponding MOV immediate instruction is applicable for // this peephole optimization. bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI, @@ -357,7 +370,7 @@ // number since it was sign extended when we assign to the 64-bit Imm. if (SubregToRegMI) Imm &= 0xFFFFFFFF; - unsigned Opcode; + OpcodePair Opcode; if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1)) Opcode = R.getValue(); else @@ -365,16 +378,18 @@ // Create new ADD/SUB MIs. MachineFunction *MF = MI.getMF(); - const TargetRegisterClass *RC = - TII->getRegClass(TII->get(Opcode), 0, TRI, *MF); + const TargetRegisterClass *RC0 = + TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF); + const TargetRegisterClass *RC1 = + TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF); const TargetRegisterClass *ORC = - TII->getRegClass(TII->get(Opcode), 1, TRI, *MF); + TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF); Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); - Register NewTmpReg = MRI->createVirtualRegister(RC); - Register NewDstReg = MRI->createVirtualRegister(RC); + Register NewTmpReg = MRI->createVirtualRegister(RC0); + Register NewDstReg = MRI->createVirtualRegister(RC1); - MRI->constrainRegClass(SrcReg, RC); + MRI->constrainRegClass(SrcReg, ORC); MRI->constrainRegClass(NewTmpReg, ORC); MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg)); @@ -439,6 +454,26 @@ Changed = visitADDSUB(AArch64::SUBXri, AArch64::ADDXri, MI, ToBeRemoved); break; + case AArch64::ADDSWrr: + Changed = visitGenericADDSUB( + {AArch64::ADDWri, AArch64::ADDSWri}, + {AArch64::SUBWri, AArch64::SUBSWri}, MI, ToBeRemoved); + break; + case AArch64::SUBSWrr: + Changed = visitGenericADDSUB( + {AArch64::SUBWri, AArch64::SUBSWri}, + {AArch64::ADDWri, AArch64::ADDSWri}, MI, ToBeRemoved); + break; + case AArch64::ADDSXrr: + Changed = visitGenericADDSUB( + {AArch64::ADDXri, AArch64::ADDSXri}, + {AArch64::SUBXri, AArch64::SUBSXri}, MI, ToBeRemoved); + break; + case AArch64::SUBSXrr: + Changed = visitGenericADDSUB( + {AArch64::SUBXri, AArch64::SUBSXri}, + {AArch64::ADDXri, AArch64::ADDSXri}, MI, ToBeRemoved); + break; } } } diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll --- a/llvm/test/CodeGen/AArch64/addsub.ll +++ b/llvm/test/CodeGen/AArch64/addsub.ll @@ -406,4 +406,103 @@ ret i64 %b } -; TODO: adds/subs +; ADDS and SUBS Optimizations +define i1 @eq_i(i32 %0) { +; CHECK-LABEL: eq_i: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, #273, lsl #12 // =1118208 +; CHECK-NEXT: cmp w8, #273 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %2 = icmp eq i32 %0, 1118481 + ret i1 %2 +} + +define i1 @eq_l(i64 %0) { +; CHECK-LABEL: eq_l: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, #273, lsl #12 // =1118208 +; CHECK-NEXT: cmp x8, #273 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %2 = icmp eq i64 %0, 1118481 + ret i1 %2 +} + +define i1 @ne_i(i32 %0) { +; CHECK-LABEL: ne_i: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, #273, lsl #12 // =1118208 +; CHECK-NEXT: cmp w8, #273 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %2 = icmp ne i32 %0, 1118481 + ret i1 %2 +} + +define i1 @ne_l(i64 %0) { +; CHECK-LABEL: ne_l: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, #273, lsl #12 // =1118208 +; CHECK-NEXT: cmp x8, #273 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %2 = icmp ne i64 %0, 1118481 + ret i1 %2 +} + +define i1 @eq_in(i32 %0) { +; CHECK-LABEL: eq_in: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #273, lsl #12 // =1118208 +; CHECK-NEXT: cmn w8, #273 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %2 = icmp eq i32 %0, -1118481 + ret i1 %2 +} + +define i1 @eq_ln(i64 %0) { +; CHECK-LABEL: eq_ln: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #273, lsl #12 // =1118208 +; CHECK-NEXT: cmn x8, #273 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %2 = icmp eq i64 %0, -1118481 + ret i1 %2 +} + +define i1 @ne_in(i32 %0) { +; CHECK-LABEL: ne_in: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #273, lsl #12 // =1118208 +; CHECK-NEXT: cmn w8, #273 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %2 = icmp ne i32 %0, -1118481 + ret i1 %2 +} + +define i1 @ne_ln(i64 %0) { +; CHECK-LABEL: ne_ln: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #273, lsl #12 // =1118208 +; CHECK-NEXT: cmn x8, #273 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %2 = icmp ne i64 %0, -1118481 + ret i1 %2 +} + +define i1 @reject_eq(i32 %0) { +; CHECK-LABEL: reject_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #51712 +; CHECK-NEXT: movk w8, #15258, lsl #16 +; CHECK-NEXT: cmp w0, w8 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %2 = icmp eq i32 %0, 1000000000 + ret i1 %2 +} diff --git a/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll b/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll --- a/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll +++ b/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll @@ -11,9 +11,8 @@ ; YAML: - BasicBlock: entry ; YAML: - INST_add: '2' ; YAML: - INST_b.: '1' -; YAML: - INST_ldr: '1' -; YAML: - INST_movk: '1' -; YAML: - INST_movz: '1' +; YAML: - INST_orr: '1' +; YAML: - INST_sub: '1' ; YAML: - INST_subs: '1' ; YAML: Name: InstructionMix @@ -27,13 +26,12 @@ define i32 @foo(i32* %ptr, i32 %x, i64 %y) !dbg !3 { ; CHECK-LABEL: foo: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: ldr w10, [x0] +; CHECK-NEXT: ldr w9, [x0] ; CHECK-NEXT: mov x8, x0 -; CHECK-NEXT: mov w9, #16959 -; CHECK-NEXT: movk w9, #15, lsl #16 -; CHECK-NEXT: add w0, w10, w1 -; CHECK-NEXT: add x10, x0, x2 -; CHECK-NEXT: cmp x10, x9 +; CHECK-NEXT: add w0, w9, w1 +; CHECK-NEXT: add x9, x0, x2 +; CHECK-NEXT: sub x9, x9, #244, lsl #12 ; =999424 +; CHECK-NEXT: cmp x9, #575 ; CHECK-NEXT: b.eq LBB0_2 ; CHECK-NEXT: ; %bb.1: ; %else ; CHECK-NEXT: mul w9, w0, w1 diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll @@ -9,10 +9,9 @@ ; CHECK-NEXT: movk w8, #8026, lsl #16 ; CHECK-NEXT: movk w9, #41, lsl #16 ; CHECK-NEXT: madd w8, w0, w8, w9 -; CHECK-NEXT: mov w9, #48987 -; CHECK-NEXT: movk w9, #82, lsl #16 ; CHECK-NEXT: and w8, w8, #0x1fffffff -; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: sub w8, w8, #1323, lsl #12 // =5419008 +; CHECK-NEXT: cmp w8, #3931 ; CHECK-NEXT: cset w0, lo ; CHECK-NEXT: ret %srem = srem i29 %X, 99 diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll @@ -24,9 +24,8 @@ ; CHECK-NEXT: lsl w9, w8, #26 ; CHECK-NEXT: bfxil w9, w8, #1, #26 ; CHECK-NEXT: and w8, w9, #0x7ffffff -; CHECK-NEXT: mov w9, #18725 -; CHECK-NEXT: movk w9, #146, lsl #16 -; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: sub w8, w8, #2340, lsl #12 // =9584640 +; CHECK-NEXT: cmp w8, #2341 ; CHECK-NEXT: cset w0, lo ; CHECK-NEXT: ret %urem = urem i27 %X, 14