diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -535,6 +535,11 @@ const SelectionDAG &DAG, unsigned Depth = 0) const override; + unsigned ComputeNumSignBitsForTargetNode(SDValue Op, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const override; + MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { // Returning i64 unconditionally here (i.e. even for ILP32) means that the // *DAG* representation of pointers will always be 64-bits. They will be diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2141,6 +2141,38 @@ } } +unsigned AArch64TargetLowering::ComputeNumSignBitsForTargetNode( + SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, + unsigned Depth) const { + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getScalarSizeInBits(); + unsigned Opcode = Op.getOpcode(); + switch (Opcode) { + case AArch64ISD::CMEQ: + case AArch64ISD::CMGE: + case AArch64ISD::CMGT: + case AArch64ISD::CMHI: + case AArch64ISD::CMHS: + case AArch64ISD::FCMEQ: + case AArch64ISD::FCMGE: + case AArch64ISD::FCMGT: + case AArch64ISD::CMEQz: + case AArch64ISD::CMGEz: + case AArch64ISD::CMGTz: + case AArch64ISD::CMLEz: + case AArch64ISD::CMLTz: + case AArch64ISD::FCMEQz: + case AArch64ISD::FCMGEz: + case AArch64ISD::FCMGTz: + case AArch64ISD::FCMLEz: + case AArch64ISD::FCMLTz: + // Compares return either 0 or all-ones + return VTBits; + } + + return 1; +} + MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL, EVT) const { return MVT::i64; @@ -19478,6 +19510,13 @@ unsigned ShiftImm = N->getConstantOperandVal(1); assert(OpScalarSize > ShiftImm && "Invalid shift imm"); + // Remove sign_extend_inreg (ashr(shl(x)) based on the number of sign bits. + if (N->getOpcode() == AArch64ISD::VASHR && + Op.getOpcode() == AArch64ISD::VSHL && + N->getOperand(1) == Op.getOperand(1)) + if (DCI.DAG.ComputeNumSignBits(Op.getOperand(0)) > ShiftImm) + return Op.getOperand(0); + APInt ShiftedOutBits = APInt::getLowBitsSet(OpScalarSize, ShiftImm); APInt DemandedMask = ~ShiftedOutBits; diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll --- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -71,7 +71,7 @@ define i64 @not_sign_i64(i64 %a) { ; CHECK-LABEL: not_sign_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: cmp x0, #0 ; CHECK-NEXT: cneg x0, x8, le ; CHECK-NEXT: ret @@ -104,7 +104,7 @@ define i64 @not_sign_i64_4(i64 %a) { ; CHECK-LABEL: not_sign_i64_4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #-1 +; CHECK-NEXT: mov x0, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ret %c = icmp ugt i64 %a, -1 %res = select i1 %c, i64 1, i64 -1 @@ -229,10 +229,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: movi v2.4s, #1 -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] ; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s -; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b +; CHECK-NEXT: bic v1.16b, v2.16b, v0.16b +; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %a, %res = select <4 x i1> %c, <4 x i32> , <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -580,8 +580,6 @@ ; CHECK-NEXT: mov v0.h[0], v1.h[0] ; CHECK-NEXT: mov w8, v1.s[1] ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -536,20 +536,18 @@ ; CHECK-NEXT: ldr s1, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: ldr q3, [x1] ; CHECK-NEXT: fcmeq v2.4h, v1.4h, #0.0 ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: sshll v2.4s, v2.4h, #0 ; CHECK-NEXT: mov v0.h[0], v2.h[0] ; CHECK-NEXT: mov w8, v2.s[1] -; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: uunpklo z0.d, z1.s -; CHECK-NEXT: st1h { z0.d }, p0, [z2.d] +; CHECK-NEXT: st1h { z0.d }, p0, [z3.d] ; CHECK-NEXT: ret %vals = load <2 x half>, ptr %a %ptrs = load <2 x ptr>, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll @@ -21,8 +21,6 @@ ; CHECK-NEXT: mov v0.h[0], v2.h[0] ; CHECK-NEXT: mov w8, v2.s[1] ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: st1h { z1.h }, p0, [x1] ; CHECK-NEXT: ret