Index: lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -777,6 +777,21 @@ return false; } +// Check if the given immediate is preferred by ADD. If an immediate can be +// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be +// encoded by one MOVZ, return true. +static bool isPreferredADD(int64_t ImmOff) { + // Constant in [0x0, 0xfff] can be encoded in ADD. + if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) + return true; + // Check if it can be encoded in an "ADD LSL #12". + if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) + // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. + return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && + (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; + return false; +} + bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, SDValue &Offset, SDValue &SignExtend, @@ -786,11 +801,6 @@ SDValue LHS = N.getOperand(0); SDValue RHS = N.getOperand(1); - // We don't want to match immediate adds here, because they are better lowered - // to the register-immediate addressing modes. - if (isa(LHS) || isa(RHS)) - return false; - // Check if this particular node is reused in any non-memory related // operation. If yes, do not try to fold this node into the address // computation, since the computation will be kept. @@ -800,6 +810,36 @@ return false; } + // Watch out if RHS is a wide immediate, it can not be selected into + // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into + // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate + // instructions like: + // MOV X0, WideImmediate + // ADD X1, BaseReg, X0 + // LDR X2, [X1, 0] + // For such situation, using [BaseReg, XReg] addressing mode can save one + // ADD/SUB: + // MOV X0, WideImmediate + // LDR X2, [BaseReg, X0] + if (isa(RHS)) { + int64_t ImmOff = (int64_t)dyn_cast(RHS)->getZExtValue(); + unsigned Scale = Log2_32(Size); + // Skip the immediate can be seleced by load/store addressing mode. + // Also skip the immediate can be encoded by a single ADD (SUB is also + // checked by using -ImmOff). + if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || + isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) + return false; + + SDLoc DL(N.getNode()); + SDValue Ops[] = { RHS }; + SDNode *MOVI = + CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); + SDValue MOVIV = SDValue(MOVI, 0); + // This ADD of two X register will be selected into [Reg+Reg] mode. + N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); + } + // Remember if it is worth folding N when it produces extended register. bool IsExtendedRegisterWorthFolding = isWorthFolding(N); Index: test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll =================================================================== --- test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll +++ test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll @@ -12,7 +12,7 @@ for.body: ; CHECK: for.body -; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}] +; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}] ; CHECK: add x[[REG:[0-9]+]], ; CHECK: x[[REG]], #1, lsl #12 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] Index: test/CodeGen/AArch64/arm64-addrmode.ll =================================================================== --- test/CodeGen/AArch64/arm64-addrmode.ll +++ test/CodeGen/AArch64/arm64-addrmode.ll @@ -37,9 +37,8 @@ ; base + unsigned offset (> imm12 * size of type in bytes) ; CHECK: @t4 -; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #8, lsl #12 -; CHECK: ldr xzr, [ -; CHECK: [[ADDREG]]] +; CHECK: orr w[[NUM:[0-9]+]], wzr, #0x8000 +; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]] ; CHECK: ret define void @t4() { %incdec.ptr = getelementptr inbounds i64* @object, i64 4096 @@ -60,9 +59,8 @@ ; base + reg + imm ; CHECK: @t6 ; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3 -; CHECK-NEXT: add [[ADDREG]], [[ADDREG]], #8, lsl #12 -; CHECK: ldr xzr, [ -; CHECK: [[ADDREG]]] +; CHECK-NEXT: orr w[[NUM:[0-9]+]], wzr, #0x8000 +; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]] ; CHECK: ret define void @t6(i64 %a) { %tmp1 = getelementptr inbounds i64* @object, i64 %a @@ -70,3 +68,114 @@ %tmp = load volatile i64* %incdec.ptr, align 8 ret void } + +; Test base + wide immediate +define void @t7(i64 %a) { +; CHECK-LABEL: t7: +; CHECK: orr w[[NUM:[0-9]+]], wzr, #0xffff +; CHECK-NEXT: ldr xzr, [x0, x[[NUM]]] + %1 = add i64 %a, 65535 ;0xffff + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t8(i64 %a) { +; CHECK-LABEL: t8: +; CHECK: movn [[REG:x[0-9]+]], #0x1235 +; CHECK-NEXT: ldr xzr, [x0, [[REG]]] + %1 = sub i64 %a, 4662 ;-4662 is 0xffffffffffffedca + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t9(i64 %a) { +; CHECK-LABEL: t9: +; CHECK: movn [[REG:x[0-9]+]], #0x1235, lsl #16 +; CHECK-NEXT: ldr xzr, [x0, [[REG]]] + %1 = add i64 -305463297, %a ;-305463297 is 0xffffffffedcaffff + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t10(i64 %a) { +; CHECK-LABEL: t10: +; CHECK: movz [[REG:x[0-9]+]], #0x123, lsl #48 +; CHECK-NEXT: ldr xzr, [x0, [[REG]]] + %1 = add i64 %a, 81909218222800896 ;0x123000000000000 + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t11(i64 %a) { +; CHECK-LABEL: t11: +; CHECK: movz w[[NUM:[0-9]+]], #0x123, lsl #16 +; CHECK: movk w[[NUM:[0-9]+]], #0x4567 +; CHECK-NEXT: ldr xzr, [x0, x[[NUM]]] + %1 = add i64 %a, 19088743 ;0x1234567 + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +; Test some boundaries that should not use movz/movn/orr +define void @t12(i64 %a) { +; CHECK-LABEL: t12: +; CHECK: add [[REG:x[0-9]+]], x0, #4095 +; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]] + %1 = add i64 %a, 4095 ;0xfff + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t13(i64 %a) { +; CHECK-LABEL: t13: +; CHECK: sub [[REG:x[0-9]+]], x0, #4095 +; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]] + %1 = add i64 %a, -4095 ;-0xfff + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t14(i64 %a) { +; CHECK-LABEL: t14: +; CHECK: add [[REG:x[0-9]+]], x0, #291, lsl #12 +; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]] + %1 = add i64 %a, 1191936 ;0x123000 + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t15(i64 %a) { +; CHECK-LABEL: t15: +; CHECK: sub [[REG:x[0-9]+]], x0, #291, lsl #12 +; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]] + %1 = add i64 %a, -1191936 ;0xFFFFFFFFFFEDD000 + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t16(i64 %a) { +; CHECK-LABEL: t16: +; CHECK: ldr xzr, [x0, #28672] + %1 = add i64 %a, 28672 ;0x7000 + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} + +define void @t17(i64 %a) { +; CHECK-LABEL: t17: +; CHECK: ldur xzr, [x0, #-256] + %1 = add i64 %a, -256 ;-0x100 + %2 = inttoptr i64 %1 to i64* + %3 = load volatile i64* %2, align 8 + ret void +} Index: test/CodeGen/AArch64/fast-isel-addressing-modes.ll =================================================================== --- test/CodeGen/AArch64/fast-isel-addressing-modes.ll +++ test/CodeGen/AArch64/fast-isel-addressing-modes.ll @@ -172,9 +172,12 @@ ; Min un-supported scaled offset define i32 @load_breg_immoff_6(i64 %a) { -; CHECK-LABEL: load_breg_immoff_6 -; CHECK: add [[REG:x[0-9]+]], x0, #4, lsl #12 -; CHECK-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}} +; SDAG-LABEL: load_breg_immoff_6 +; SDAG: orr w[[NUM:[0-9]+]], wzr, #0x4000 +; SDAG-NEXT: ldr {{w[0-9]+}}, [x0, x[[NUM]]] +; FAST-LABEL: load_breg_immoff_6 +; FAST: add [[REG:x[0-9]+]], x0, #4, lsl #12 +; FAST-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}} %1 = add i64 %a, 16384 %2 = inttoptr i64 %1 to i32* %3 = load i32* %2 @@ -235,9 +238,12 @@ ; Min un-supported scaled offset define void @store_breg_immoff_6(i64 %a) { -; CHECK-LABEL: store_breg_immoff_6 -; CHECK: add [[REG:x[0-9]+]], x0, #4, lsl #12 -; CHECK-NEXT: str wzr, {{\[}}[[REG]]{{\]}} +; SDAG-LABEL: store_breg_immoff_6 +; SDAG: orr w[[NUM:[0-9]+]], wzr, #0x4000 +; SDAG-NEXT: str wzr, [x0, x[[NUM]]] +; FAST-LABEL: store_breg_immoff_6 +; FAST: add [[REG:x[0-9]+]], x0, #4, lsl #12 +; FAST-NEXT: str wzr, {{\[}}[[REG]]{{\]}} %1 = add i64 %a, 16384 %2 = inttoptr i64 %1 to i32* store i32 0, i32* %2 @@ -298,8 +304,8 @@ define i64 @load_breg_offreg_immoff_2(i64 %a, i64 %b) { ; SDAG-LABEL: load_breg_offreg_immoff_2 ; SDAG: add [[REG1:x[0-9]+]], x0, x1 -; SDAG-NEXT: add [[REG2:x[0-9]+]], [[REG1]], #15, lsl #12 -; SDAG-NEXT: ldr x0, {{\[}}[[REG2]]{{\]}} +; SDAG-NEXT: orr w[[NUM:[0-9]+]], wzr, #0xf000 +; SDAG-NEXT: ldr x0, {{\[}}[[REG1]], x[[NUM]]] ; FAST-LABEL: load_breg_offreg_immoff_2 ; FAST: add [[REG:x[0-9]+]], x0, #15, lsl #12 ; FAST-NEXT: ldr x0, {{\[}}[[REG]], x1{{\]}}