diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -801,6 +801,7 @@ SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINT_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -887,6 +887,13 @@ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction(ISD::SDIV, VT, Custom); setOperationAction(ISD::UDIV, VT, Custom); + + // Custom lowering for SIGN_EXTEND etc. is specifically cases where the + // operand is a predicate. (All other cases involve illegal types, so + // type legalization will transform them.) + setOperationAction(ISD::SIGN_EXTEND, VT, Custom); + setOperationAction(ISD::ZERO_EXTEND, VT, Custom); + setOperationAction(ISD::ANY_EXTEND, VT, Custom); } } setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); @@ -991,10 +998,12 @@ addTypeForNEON(VT, MVT::v4i32); } -EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, - EVT VT) const { +EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, + LLVMContext &C, EVT VT) const { if (!VT.isVector()) return MVT::i32; + if (VT.isScalableVector()) + return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount()); return VT.changeVectorElementTypeToInteger(); } @@ -3406,6 +3415,10 @@ return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::VSCALE: return LowerVSCALE(Op, DAG); + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + return LowerINT_EXTEND(Op, DAG); } } @@ -8931,6 +8944,27 @@ DL, VT); } +SDValue AArch64TargetLowering::LowerINT_EXTEND(SDValue Op, + SelectionDAG &DAG) const { + // We only request custom lowering for SVE integer vectors. + // + // The operand of the EXTEND must be an i1 vector; otherwise, one of the + // types involved would be illegal. + EVT VT = Op.getValueType(); + assert(VT.isScalableVector() && "Expected SVE vector EXTEND"); + assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == 1 && + "Expected EXTEND from i1"); + + // Lower nxvNi1 extend to the equivalent VSELECT. + SDLoc DL(Op); + SDValue Condition = Op.getOperand(0); + SDValue True = Op.getOpcode() == ISD::SIGN_EXTEND + ? DAG.getAllOnesConstant(DL, VT) + : DAG.getConstant(1, DL, VT); + SDValue False = DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::VSELECT, DL, VT, Condition, True, False); +} + /// Set the IntrinsicInfo for the `aarch64_sve_st` intrinsics. template static bool setInfoSVEStN(AArch64TargetLowering::IntrinsicInfo &Info, diff --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll --- a/llvm/test/CodeGen/AArch64/sve-fcmp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcmp.ll @@ -229,3 +229,31 @@ %y = fcmp ueq %x, %x2 ret %y } + +define @oeq_4f32_sext( %x, %x2) { +; CHECK-LABEL: oeq_4f32_sext: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.s, #0 // =0x0 +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: mov z0.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s +; CHECK-NEXT: ret + %y = fcmp oeq %x, %x2 + %r = sext %y to + ret %r +} + +define @oeq_4f32_zext( %x, %x2) { +; CHECK-LABEL: oeq_4f32_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.s, #0 // =0x0 +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: mov z0.s, #1 // =0x1 +; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s +; CHECK-NEXT: ret + %y = fcmp oeq %x, %x2 + %r = zext %y to + ret %r +} diff --git a/llvm/test/CodeGen/AArch64/sve-sext-zext.ll b/llvm/test/CodeGen/AArch64/sve-sext-zext.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-sext-zext.ll @@ -0,0 +1,206 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +define @sext_i1_i8( %a) { +; CHECK-LABEL: sext_i1_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov z0.b, w8 +; CHECK-NEXT: mov z1.b, #0 // =0x0 +; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @sext_i1_i16( %a) { +; CHECK-LABEL: sext_i1_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: mov z1.h, #0 // =0x0 +; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @sext_i1_i32( %a) { +; CHECK-LABEL: sext_i1_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @sext_i1_i64( %a) { +; CHECK-LABEL: sext_i1_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #0 // =0x0 +; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @zext_i1_i8( %a) { +; CHECK-LABEL: zext_i1_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.b, #0 // =0x0 +; CHECK-NEXT: mov z1.b, #1 // =0x1 +; CHECK-NEXT: mov z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @zext_i1_i16( %a) { +; CHECK-LABEL: zext_i1_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: mov z1.h, #1 // =0x1 +; CHECK-NEXT: mov z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @zext_i1_i32( %a) { +; CHECK-LABEL: zext_i1_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: mov z1.s, #1 // =0x1 +; CHECK-NEXT: mov z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @zext_i1_i64( %a) { +; CHECK-LABEL: zext_i1_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #0 // =0x0 +; CHECK-NEXT: mov z1.d, #1 // =0x1 +; CHECK-NEXT: mov z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @sext_i8_i16( %a) { +; CHECK-LABEL: sext_i8_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sxtb z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @sext_i8_i32( %a) { +; CHECK-LABEL: sext_i8_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxtb z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @sext_i8_i64( %a) { +; CHECK-LABEL: sext_i8_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtb z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @zext_i8_i16( %a) { +; CHECK-LABEL: zext_i8_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.h, z0.h, #0xff +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @zext_i8_i32( %a) { +; CHECK-LABEL: zext_i8_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.s, z0.s, #0xff +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @zext_i8_i64( %a) { +; CHECK-LABEL: zext_i8_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.d, z0.d, #0xff +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @sext_i16_i32( %a) { +; CHECK-LABEL: sext_i16_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxth z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @sext_i16_i64( %a) { +; CHECK-LABEL: sext_i16_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxth z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @zext_i16_i32( %a) { +; CHECK-LABEL: zext_i16_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.s, z0.s, #0xffff +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @zext_i16_i64( %a) { +; CHECK-LABEL: zext_i16_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.d, z0.d, #0xffff +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @sext_i32_i64( %a) { +; CHECK-LABEL: sext_i32_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @zext_i32_i64( %a) { +; CHECK-LABEL: zext_i32_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.d, z0.d, #0xffffffff +; CHECK-NEXT: ret + %r = zext %a to + ret %r +}