Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -310,6 +310,17 @@ //===----------------------------------------------------------------------===// +// Any instruction that defines a 32-bit result leaves the high half of the +// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may +// be copying from a truncate. But any other 32-bit operation will zero-extend +// up to 64 bits. +// FIXME: X86 also checks for CMOV here. Do we need something similar? +def def32 : PatLeaf<(i32 GPR32:$src), [{ + return N->getOpcode() != ISD::TRUNCATE && + N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && + N->getOpcode() != ISD::CopyFromReg; +}]>; + //===----------------------------------------------------------------------===// // Miscellaneous instructions. //===----------------------------------------------------------------------===// @@ -645,6 +656,13 @@ def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64:$R3), (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64:$R3)>; } +// Avoid using SUBSXrx for 32->64 zext if we can get it for free since +// it's src regclass is GPR64sp, which prevents the folding of movs of +// XZR. +let AddedComplexity = 10 in { +def : Pat<(sub GPR64:$Rn, (zext def32:$src)), + (SUBSXrr GPR64:$Rn, (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32))>; +} // Because of the immediate format for add/sub-imm instructions, the // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). @@ -5301,17 +5319,6 @@ //---------------------------------------------------------------------------- // FIXME: Like for X86, these should go in their own separate .td file. -// Any instruction that defines a 32-bit result leaves the high half of the -// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may -// be copying from a truncate. But any other 32-bit operation will zero-extend -// up to 64 bits. -// FIXME: X86 also checks for CMOV here. Do we need something similar? -def def32 : PatLeaf<(i32 GPR32:$src), [{ - return N->getOpcode() != ISD::TRUNCATE && - N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && - N->getOpcode() != ISD::CopyFromReg; -}]>; - // In the case of a 32-bit def that is known to implicitly zero-extend, // we can use a SUBREG_TO_REG. def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>; Index: test/CodeGen/AArch64/addsub_ext.ll =================================================================== --- test/CodeGen/AArch64/addsub_ext.ll +++ test/CodeGen/AArch64/addsub_ext.ll @@ -304,19 +304,20 @@ ret void } -define void @sub_i32rhs() minsize { +define void @sub_i32rhs(i32 %in32) minsize { ; CHECK-LABEL: sub_i32rhs: %val32_tmp = load i32, i32* @var32 %lhs64 = load i64, i64* @var64 %val32 = add i32 %val32_tmp, 123 - %rhs64_zext = zext i32 %val32 to i64 + %rhs64_zext = zext i32 %in32 to i64 %res64_zext = sub i64 %lhs64, %rhs64_zext store volatile i64 %res64_zext, i64* @var64 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw - %rhs64_zext_shift = shl i64 %rhs64_zext, 2 + %rhs64_zext2 = zext i32 %val32 to i64 + %rhs64_zext_shift = shl i64 %rhs64_zext2, 2 %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift store volatile i64 %res64_zext_shift, i64* @var64 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2 Index: test/CodeGen/AArch64/neg-zext.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/neg-zext.ll @@ -0,0 +1,15 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s +; ISEL used to prefer to match the sub as SUBSXrx, which prevented +; folding of mov WZR to form a neg instruction. + +define i64 @test1(i32 %x) { +; CHECK-LABEL: test1: +entry: + %m = and i32 %x, 3 + %ext = zext i32 %m to i64 +; CHECK: and w[[TMP:[0-9]+]], w0, #0x3 +; CHECK-NEXT: neg x0, x[[TMP]] +; CHECK-NEXT: ret + %ret = sub i64 0, %ext + ret i64 %ret +}