diff --git a/llvm/include/llvm/IR/IntrinsicsHexagon.td b/llvm/include/llvm/IR/IntrinsicsHexagon.td --- a/llvm/include/llvm/IR/IntrinsicsHexagon.td +++ b/llvm/include/llvm/IR/IntrinsicsHexagon.td @@ -253,8 +253,14 @@ def int_hexagon_V6_vrmpyub_rtt_acc_128B : Hexagon_v64i32_v64i32v32i32i64_rtt_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc_128B">; - +// HVX Vector predicate casts // +def int_hexagon_V6_pred_typecast : +Hexagon_NonGCC_Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; + +def int_hexagon_V6_pred_typecast_128B : +Hexagon_NonGCC_Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; + // Masked vector stores // diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -406,6 +406,7 @@ bool isHvxSingleTy(MVT Ty) const; bool isHvxPairTy(MVT Ty) const; + bool isHvxBoolTy(MVT Ty) const; SDValue convertToByteIndex(SDValue ElemIdx, MVT ElemTy, SelectionDAG &DAG) const; SDValue getIndexInWord32(SDValue Idx, MVT ElemTy, SelectionDAG &DAG) const; @@ -454,6 +455,7 @@ SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const; SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const; SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3055,7 +3055,7 @@ case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); - case ISD::VACOPY: return LowerVACOPY(Op, DAG); + case ISD::VACOPY: return LowerVACOPY(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -9,6 +9,7 @@ #include "HexagonISelLowering.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" +#include "llvm/IR/IntrinsicsHexagon.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -64,8 +65,9 @@ AddPromotedToType(Opc, FromTy, ToTy); }; - setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal); - setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); for (MVT T : LegalV) { setIndexedLoadAction(ISD::POST_INC, T, Legal); @@ -192,12 +194,13 @@ setOperationAction(ISD::XOR, BoolV, Legal); } - if (Use64b) + if (Use64b) { for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32}) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal); - else + } else { for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32}) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal); + } setTargetDAGCombine(ISD::VSELECT); } @@ -281,6 +284,12 @@ Ty.getSizeInBits() == 16 * Subtarget.getVectorLength(); } +bool +HexagonTargetLowering::isHvxBoolTy(MVT Ty) const { + return Subtarget.isHVXVectorType(Ty, true) && + Ty.getVectorElementType() == MVT::i1; +} + SDValue HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy, SelectionDAG &DAG) const { @@ -1443,6 +1452,28 @@ return Op; } +SDValue +HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const { + const SDLoc &dl(Op); + MVT ResTy = ty(Op); + + unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); + bool Use64b = Subtarget.useHVX64BOps(); + unsigned IntPredCast = Use64b ? Intrinsic::hexagon_V6_pred_typecast + : Intrinsic::hexagon_V6_pred_typecast_128B; + if (IntNo == IntPredCast) { + SDValue Vs = Op.getOperand(1); + MVT OpTy = ty(Vs); + if (isHvxBoolTy(ResTy) && isHvxBoolTy(OpTy)) { + if (ResTy == OpTy) + return Vs; + return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Vs); + } + } + + return Op; +} + SDValue HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const { assert(!Op.isMachineOpcode()); @@ -1578,6 +1609,7 @@ case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG); case ISD::SETCC: case ISD::INTRINSIC_VOID: return Op; + case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG); // Unaligned loads will be handled by the default lowering. case ISD::LOAD: return SDValue(); } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-predicate-typecast.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-predicate-typecast.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-predicate-typecast.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check that this compiles successfully. + +declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32) +declare <32 x i32> @llvm.hexagon.V6.vandqrt.128B(<128 x i1>, i32) + +; The overloaded intrinsic @llvm.hexagon.V6.pred.typecast.128B changes +; the type of the vector predicate. Each intended application needs to be +; declared individually, and they are distinguished by unique suffixes. +; These suffixes don't mean anything. + +declare <32 x i1> @llvm.hexagon.V6.pred.typecast.128B.s1(<128 x i1>) +declare <128 x i1> @llvm.hexagon.V6.pred.typecast.128B.s2(<32 x i1>) + +; CHECK-LABEL: fred: + +; CHECK: r[[R0:[0-9]+]] = #-1 +; CHECK: q[[Q0:[0-9]+]] = vand(v0,r[[R0]]) +; CHECK: v0 = vand(q[[Q0]],r[[R0]]) + +define <32 x i32> @fred(<32 x i32> %a0) #0 { + %q0 = call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a0, i32 -1) + %q1 = call <32 x i1> @llvm.hexagon.V6.pred.typecast.128B.s1(<128 x i1> %q0) + %q2 = call <128 x i1> @llvm.hexagon.V6.pred.typecast.128B.s2(<32 x i1> %q1) + %v0 = call <32 x i32> @llvm.hexagon.V6.vandqrt.128B(<128 x i1> %q2, i32 -1) + ret <32 x i32> %v0 +} + +attributes #0 = { readnone nounwind "target-cpu"="hexagonv66" "target-features"="+hvxv66,+hvx-length128b" } +