Index: llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -12,6 +12,7 @@ #include "AArch64MachineFunctionInfo.h" #include "AArch64TargetMachine.h" +#include "AArch64ExpandImm.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/APSInt.h" #include "llvm/CodeGen/SelectionDAGISel.h" Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1619,6 +1619,89 @@ def : InstAlias<"negs $dst, $src$shift", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; +// Optimize ([add|sub] r, imm) -> +// ([ADD|SUB] ([ADD|SUB] r, #imm0, lsl #12), #imm1) if possible. +def PTwoPartsAddImm : PatLeaf<(imm), [{ + int64_t I = N->getSExtValue(); + // The imm must be in the form of ((imm0 << 12) + imm1), in which both imm0 + // and imm1 are non-zero 12-bit unsigned int. + if ((I & ~0xffffffll) != 0 || (I & 0xfff000) == 0 || (I & 0xfff) == 0) + return false; + // The imm has to be composed by at least two instructions. + SmallVector Insn; + const EVT VT = N->getValueType(0); + AArch64_IMM::expandMOVImm(N->getZExtValue(), VT.getSizeInBits(), Insn); + return Insn.size() > 1; +}]>; + +def PTwoPartsAddImmHigh : SDNodeXFormgetSExtValue() >> 12; + return CurDAG->getTargetConstant(I & 0xfff, SDLoc(N), N->getValueType(0)); +}]>; + +def PTwoPartsAddImmLow : SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(I & 0xfff, SDLoc(N), N->getValueType(0)); +}]>; + +def : Pat<(add GPR64:$R, PTwoPartsAddImm:$imm), + (ADDXri (ADDXri GPR64:$R, (PTwoPartsAddImmHigh PTwoPartsAddImm:$imm), + 12), + (PTwoPartsAddImmLow PTwoPartsAddImm:$imm), 0)>; +def : Pat<(add GPR32:$R, PTwoPartsAddImm:$imm), + (ADDWri (ADDWri GPR32:$R, (PTwoPartsAddImmHigh PTwoPartsAddImm:$imm), + 12), + (PTwoPartsAddImmLow PTwoPartsAddImm:$imm), 0)>; +def : Pat<(sub GPR64:$R, PTwoPartsAddImm:$imm), + (SUBXri (SUBXri GPR64:$R, (PTwoPartsAddImmHigh PTwoPartsAddImm:$imm), + 12), + (PTwoPartsAddImmLow PTwoPartsAddImm:$imm), 0)>; +def : Pat<(sub GPR32:$R, PTwoPartsAddImm:$imm), + (SUBWri (SUBWri GPR32:$R, (PTwoPartsAddImmHigh PTwoPartsAddImm:$imm), + 12), + (PTwoPartsAddImmLow PTwoPartsAddImm:$imm), 0)>; + +// Optimize ([add|sub] r, imm) -> +// ([SUB|ADD] ([ADD|SUB] r, #imm0, lsl #12), #imm1) if possible. +def NTwoPartsAddImm : PatLeaf<(imm), [{ + int64_t I = -N->getSExtValue(); + // The -imm must be in the form of ((imm0 << 12) + imm1), in which both imm0 + // and imm1 are non-zero 12-bit unsigned int. + if ((I & ~0xffffffll) != 0 || (I & 0xfff000) == 0 || (I & 0xfff) == 0) + return false; + // The imm has to be composed by at least two instructions. + SmallVector Insn; + const EVT VT = N->getValueType(0); + AArch64_IMM::expandMOVImm(N->getZExtValue(), VT.getSizeInBits(), Insn); + return Insn.size() > 1; +}]>; + +def NTwoPartsAddImmHigh : SDNodeXFormgetSExtValue()) >> 12; + return CurDAG->getTargetConstant(I & 0xfff, SDLoc(N), N->getValueType(0)); +}]>; + +def NTwoPartsAddImmLow : SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(I & 0xfff, SDLoc(N), N->getValueType(0)); +}]>; + +def : Pat<(add GPR64:$R, NTwoPartsAddImm:$imm), + (SUBXri (SUBXri GPR64:$R, (NTwoPartsAddImmHigh NTwoPartsAddImm:$imm), + 12), + (NTwoPartsAddImmLow NTwoPartsAddImm:$imm), 0)>; +def : Pat<(add GPR32:$R, NTwoPartsAddImm:$imm), + (SUBWri (SUBWri GPR32:$R, (NTwoPartsAddImmHigh NTwoPartsAddImm:$imm), + 12), + (NTwoPartsAddImmLow NTwoPartsAddImm:$imm), 0)>; +def : Pat<(sub GPR64:$R, NTwoPartsAddImm:$imm), + (ADDXri (ADDXri GPR64:$R, (NTwoPartsAddImmHigh NTwoPartsAddImm:$imm), + 12), + (NTwoPartsAddImmLow NTwoPartsAddImm:$imm), 0)>; +def : Pat<(sub GPR32:$R, NTwoPartsAddImm:$imm), + (ADDWri (ADDWri GPR32:$R, (NTwoPartsAddImmHigh NTwoPartsAddImm:$imm), + 12), + (NTwoPartsAddImmLow NTwoPartsAddImm:$imm), 0)>; // Unsigned/Signed divide defm UDIV : Div<0, "udiv", udiv>; Index: llvm/test/CodeGen/AArch64/addimm-mulimm.ll =================================================================== --- llvm/test/CodeGen/AArch64/addimm-mulimm.ll +++ llvm/test/CodeGen/AArch64/addimm-mulimm.ll @@ -52,10 +52,10 @@ define i64 @addimm_mulimm_accept_10(i64 %a) { ; CHECK-LABEL: addimm_mulimm_accept_10: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #32888 ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: movk w9, #17, lsl #16 -; CHECK-NEXT: madd x0, x0, x8, x9 +; CHECK-NEXT: mul x8, x0, x8 +; CHECK-NEXT: add x8, x8, #280, lsl #12 // =1146880 +; CHECK-NEXT: add x0, x8, #120 ; CHECK-NEXT: ret %tmp0 = add i64 %a, 31000 %tmp1 = mul i64 %tmp0, 37 @@ -65,10 +65,10 @@ define i64 @addimm_mulimm_accept_11(i64 %a) { ; CHECK-LABEL: addimm_mulimm_accept_11: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x9, #-32888 ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: movk x9, #65518, lsl #16 -; CHECK-NEXT: madd x0, x0, x8, x9 +; CHECK-NEXT: mul x8, x0, x8 +; CHECK-NEXT: sub x8, x8, #280, lsl #12 // =1146880 +; CHECK-NEXT: sub x0, x8, #120 ; CHECK-NEXT: ret %tmp0 = add i64 %a, -31000 %tmp1 = mul i64 %tmp0, 37 @@ -78,10 +78,10 @@ define signext i32 @addimm_mulimm_accept_12(i32 signext %a) { ; CHECK-LABEL: addimm_mulimm_accept_12: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #32888 ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: movk w9, #17, lsl #16 -; CHECK-NEXT: madd w0, w0, w8, w9 +; CHECK-NEXT: mul w8, w0, w8 +; CHECK-NEXT: add w8, w8, #280, lsl #12 // =1146880 +; CHECK-NEXT: add w0, w8, #120 ; CHECK-NEXT: ret %tmp0 = add i32 %a, 31000 %tmp1 = mul i32 %tmp0, 37 @@ -91,10 +91,10 @@ define signext i32 @addimm_mulimm_accept_13(i32 signext %a) { ; CHECK-LABEL: addimm_mulimm_accept_13: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #32648 ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: movk w9, #65518, lsl #16 -; CHECK-NEXT: madd w0, w0, w8, w9 +; CHECK-NEXT: mul w8, w0, w8 +; CHECK-NEXT: sub w8, w8, #280, lsl #12 // =1146880 +; CHECK-NEXT: sub w0, w8, #120 ; CHECK-NEXT: ret %tmp0 = add i32 %a, -31000 %tmp1 = mul i32 %tmp0, 37 Index: llvm/test/CodeGen/AArch64/addsub.ll =================================================================== --- llvm/test/CodeGen/AArch64/addsub.ll +++ llvm/test/CodeGen/AArch64/addsub.ll @@ -152,9 +152,8 @@ define i64 @add_two_parts_imm_i64(i64 %a) { ; CHECK-LABEL: add_two_parts_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add x0, x8, #1365 ; CHECK-NEXT: ret %b = add i64 %a, 11183445 ret i64 %b @@ -163,9 +162,8 @@ define i32 @add_two_parts_imm_i32(i32 %a) { ; CHECK-LABEL: add_two_parts_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: add w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add w0, w8, #1365 ; CHECK-NEXT: ret %b = add i32 %a, 11183445 ret i32 %b @@ -174,9 +172,8 @@ define i64 @add_two_parts_imm_i64_neg(i64 %a) { ; CHECK-LABEL: add_two_parts_imm_i64_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-42325 -; CHECK-NEXT: movk x8, #65365, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: sub x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub x0, x8, #1365 ; CHECK-NEXT: ret %b = add i64 %a, -11183445 ret i64 %b @@ -185,9 +182,8 @@ define i32 @add_two_parts_imm_i32_neg(i32 %a) { ; CHECK-LABEL: add_two_parts_imm_i32_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23211 -; CHECK-NEXT: movk w8, #65365, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: sub w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub w0, w8, #1365 ; CHECK-NEXT: ret %b = add i32 %a, -11183445 ret i32 %b @@ -196,9 +192,8 @@ define i64 @sub_two_parts_imm_i64(i64 %a) { ; CHECK-LABEL: sub_two_parts_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-42325 -; CHECK-NEXT: movk x8, #65365, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: sub x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub x0, x8, #1365 ; CHECK-NEXT: ret %b = sub i64 %a, 11183445 ret i64 %b @@ -207,9 +202,8 @@ define i32 @sub_two_parts_imm_i32(i32 %a) { ; CHECK-LABEL: sub_two_parts_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23211 -; CHECK-NEXT: movk w8, #65365, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: sub w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub w0, w8, #1365 ; CHECK-NEXT: ret %b = sub i32 %a, 11183445 ret i32 %b @@ -218,9 +212,8 @@ define i64 @sub_two_parts_imm_i64_neg(i64 %a) { ; CHECK-LABEL: sub_two_parts_imm_i64_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add x0, x8, #1365 ; CHECK-NEXT: ret %b = sub i64 %a, -11183445 ret i64 %b @@ -229,9 +222,8 @@ define i32 @sub_two_parts_imm_i32_neg(i32 %a) { ; CHECK-LABEL: sub_two_parts_imm_i32_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: add w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add w0, w8, #1365 ; CHECK-NEXT: ret %b = sub i32 %a, -11183445 ret i32 %b Index: llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll =================================================================== --- llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll +++ llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll @@ -5,10 +5,10 @@ ; CHECK-LABEL: test_srem_odd: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #33099 -; CHECK-NEXT: mov w9, #24493 ; CHECK-NEXT: movk w8, #8026, lsl #16 -; CHECK-NEXT: movk w9, #41, lsl #16 -; CHECK-NEXT: madd w8, w0, w8, w9 +; CHECK-NEXT: mul w8, w0, w8 +; CHECK-NEXT: add w8, w8, #661, lsl #12 // =2707456 +; CHECK-NEXT: add w8, w8, #4013 ; CHECK-NEXT: mov w9, #48987 ; CHECK-NEXT: and w8, w8, #0x1fffffff ; CHECK-NEXT: movk w9, #82, lsl #16 Index: llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll =================================================================== --- llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll +++ llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll @@ -135,8 +135,12 @@ while_cond: %phi = phi i32 [ 0, %entry ], [ %i, %while_body ] -; CHECK: mov w{{[0-9]+}}, #14464 +; CHECK-NOT: mov w{{[0-9]+}}, #14464 ; CHECK-NOT: mov w{{[0-9]+}}, #14468 +; CHECK-NOT: movk w{{[0-9]+}}, #1, lsl #16 +; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #19, lsl #12 +; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #2176 +; CHECK-NOT: add x{{[0-9]+}}, x{{[0-9]+}}, #2180 %gep0 = getelementptr [65536 x i32], [65536 x i32]* %struct, i64 0, i32 20000 %gep1 = getelementptr [65536 x i32], [65536 x i32]* %struct, i64 0, i32 20001 %cmp = icmp slt i32 %phi, %n