diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1972,6 +1972,12 @@ Known = KnownBits::ashr(Known, Known2); break; } + case AArch64ISD::MOVI: { + ConstantSDNode *CN = cast(Op->getOperand(0)); + Known = + KnownBits::makeConstant(APInt(Known.getBitWidth(), CN->getZExtValue())); + break; + } case AArch64ISD::LOADgot: case AArch64ISD::ADDlow: { if (!Subtarget->isTargetILP32()) @@ -23114,6 +23120,7 @@ bool AArch64TargetLowering::isTargetCanonicalConstantNode(SDValue Op) const { return Op.getOpcode() == AArch64ISD::DUP || + Op.getOpcode() == AArch64ISD::MOVI || (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR && Op.getOperand(0).getOpcode() == AArch64ISD::DUP) || TargetLowering::isTargetCanonicalConstantNode(Op); diff --git a/llvm/test/CodeGen/AArch64/fast-isel-cmp-vec.ll b/llvm/test/CodeGen/AArch64/fast-isel-cmp-vec.ll --- a/llvm/test/CodeGen/AArch64/fast-isel-cmp-vec.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-cmp-vec.ll @@ -86,7 +86,6 @@ ; CHECK-LABEL: icmp_constfold_v16i8: ; CHECK: ; %bb.0: ; CHECK-NEXT: movi.16b v0, #1 -; CHECK-NEXT: and.16b v0, v0, v0 ; CHECK-NEXT: ret %1 = icmp eq <16 x i8> %a, %a br label %bb2 diff --git a/llvm/test/CodeGen/AArch64/shift-accumulate.ll b/llvm/test/CodeGen/AArch64/shift-accumulate.ll --- a/llvm/test/CodeGen/AArch64/shift-accumulate.ll +++ b/llvm/test/CodeGen/AArch64/shift-accumulate.ll @@ -120,3 +120,57 @@ %6 = or <2 x i64> %4, %5 ret <2 x i64> %6 } + +; Expected to be able to deduce movi is generate a vector of integer +; and turn USHR+ORR into USRA. +define <8 x i16> @usra_with_movi_v8i16(<16 x i8> %0, <16 x i8> %1) { +; CHECK-LABEL: usra_with_movi_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v2.16b, #1 +; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: usra v0.8h, v0.8h, #7 +; CHECK-NEXT: ret + %3 = icmp eq <16 x i8> %0, %1 + %4 = zext <16 x i1> %3 to <16 x i8> + %5 = bitcast <16 x i8> %4 to <8 x i16> + %6 = lshr <8 x i16> %5, + %7 = or <8 x i16> %6, %5 + ret <8 x i16> %7 +} + +; Expected to be able to deduce movi is generate a vector of integer +; and turn USHR+ORR into USRA. +define <4 x i32> @usra_with_movi_v4i32(<16 x i8> %0, <16 x i8> %1) { +; CHECK-LABEL: usra_with_movi_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v2.16b, #1 +; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: usra v0.4s, v0.4s, #15 +; CHECK-NEXT: ret + %3 = icmp eq <16 x i8> %0, %1 + %4 = zext <16 x i1> %3 to <16 x i8> + %5 = bitcast <16 x i8> %4 to <4 x i32> + %6 = lshr <4 x i32> %5, + %7 = or <4 x i32> %6, %5 + ret <4 x i32> %7 +} + +; Expected to be able to deduce movi is generate a vector of integer +; and turn USHR+ORR into USRA. +define <2 x i64> @usra_with_movi_v2i64(<16 x i8> %0, <16 x i8> %1) { +; CHECK-LABEL: usra_with_movi_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v2.16b, #1 +; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: usra v0.2d, v0.2d, #31 +; CHECK-NEXT: ret + %3 = icmp eq <16 x i8> %0, %1 + %4 = zext <16 x i1> %3 to <16 x i8> + %5 = bitcast <16 x i8> %4 to <2 x i64> + %6 = lshr <2 x i64> %5, + %7 = or <2 x i64> %6, %5 + ret <2 x i64> %7 +}