diff --git a/llvm/lib/Target/VE/VECallingConv.td b/llvm/lib/Target/VE/VECallingConv.td --- a/llvm/lib/Target/VE/VECallingConv.td +++ b/llvm/lib/Target/VE/VECallingConv.td @@ -103,14 +103,7 @@ // handled conforming to the standard cc. def CC_VE_Fast : CallingConv<[ // vector --> generic vector registers - CCIfType<[v2i32, v2i64, v2f32, v2f64, - v4i32, v4i64, v4f32, v4f64, - v8i32, v8i64, v8f32, v8f64, - v16i32, v16i64, v16f32, v16f64, - v32i32, v32i64, v32f32, v32f64, - v64i32, v64i64, v64f32, v64f64, - v128i32, v128i64, v128f32, v128f64, - v256i32, v256f32, v256i64, v256f64], + CCIfType<[v256i32, v256f32, v256i64, v256f64], CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>, // TODO: make this conditional on packed mode CCIfType<[v512i32, v512f32], @@ -131,14 +124,7 @@ def RetCC_VE_Fast : CallingConv<[ // vector --> generic vector registers - CCIfType<[v2i32, v2i64, v2f32, v2f64, - v4i32, v4i64, v4f32, v4f64, - v8i32, v8i64, v8f32, v8f64, - v16i32, v16i64, v16f32, v16f64, - v32i32, v32i64, v32f32, v32f64, - v64i32, v64i64, v64f32, v64f64, - v128i32, v128i64, v128f32, v128f64, - v256i32, v256f32, v256i64, v256f64], + CCIfType<[v256i32, v256f32, v256i64, v256f64], CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>, // TODO: make this conditional on packed mode CCIfType<[v512i32, v512f32], diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -34,6 +34,8 @@ MEMBARRIER, // Compiler barrier only; generate a no-op. + VEC_BROADCAST, // 0: scalar value, 1: VL + CALL, // A call instruction. RET_FLAG, // Return with a flag operand. GLOBAL_BASE_REG, // Global base reg for PIC. @@ -114,6 +116,8 @@ SDValue lowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const; + + SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; /// } Custom Lower /// Custom DAGCombine { diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -70,6 +70,11 @@ return CCInfo.CheckReturn(Outs, RetCC); } +static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64, + MVT::v256f32, MVT::v512f32, MVT::v256f64}; + +static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1}; + void VETargetLowering::initRegisterClasses() { // Set up the register classes. addRegisterClass(MVT::i32, &VE::I32RegClass); @@ -79,46 +84,10 @@ addRegisterClass(MVT::f128, &VE::F128RegClass); if (Subtarget->enableVPU()) { - addRegisterClass(MVT::v2i32, &VE::V64RegClass); - addRegisterClass(MVT::v4i32, &VE::V64RegClass); - addRegisterClass(MVT::v8i32, &VE::V64RegClass); - addRegisterClass(MVT::v16i32, &VE::V64RegClass); - addRegisterClass(MVT::v32i32, &VE::V64RegClass); - addRegisterClass(MVT::v64i32, &VE::V64RegClass); - addRegisterClass(MVT::v128i32, &VE::V64RegClass); - addRegisterClass(MVT::v256i32, &VE::V64RegClass); - addRegisterClass(MVT::v512i32, &VE::V64RegClass); - - addRegisterClass(MVT::v2i64, &VE::V64RegClass); - addRegisterClass(MVT::v4i64, &VE::V64RegClass); - addRegisterClass(MVT::v8i64, &VE::V64RegClass); - addRegisterClass(MVT::v16i64, &VE::V64RegClass); - addRegisterClass(MVT::v32i64, &VE::V64RegClass); - addRegisterClass(MVT::v64i64, &VE::V64RegClass); - addRegisterClass(MVT::v128i64, &VE::V64RegClass); - addRegisterClass(MVT::v256i64, &VE::V64RegClass); - - addRegisterClass(MVT::v2f32, &VE::V64RegClass); - addRegisterClass(MVT::v4f32, &VE::V64RegClass); - addRegisterClass(MVT::v8f32, &VE::V64RegClass); - addRegisterClass(MVT::v16f32, &VE::V64RegClass); - addRegisterClass(MVT::v32f32, &VE::V64RegClass); - addRegisterClass(MVT::v64f32, &VE::V64RegClass); - addRegisterClass(MVT::v128f32, &VE::V64RegClass); - addRegisterClass(MVT::v256f32, &VE::V64RegClass); - addRegisterClass(MVT::v512f32, &VE::V64RegClass); - - addRegisterClass(MVT::v2f64, &VE::V64RegClass); - addRegisterClass(MVT::v4f64, &VE::V64RegClass); - addRegisterClass(MVT::v8f64, &VE::V64RegClass); - addRegisterClass(MVT::v16f64, &VE::V64RegClass); - addRegisterClass(MVT::v32f64, &VE::V64RegClass); - addRegisterClass(MVT::v64f64, &VE::V64RegClass); - addRegisterClass(MVT::v128f64, &VE::V64RegClass); - addRegisterClass(MVT::v256f64, &VE::V64RegClass); - - addRegisterClass(MVT::v256i1, &VE::VMRegClass); - addRegisterClass(MVT::v512i1, &VE::VM512RegClass); + for (MVT VecVT : AllVectorVTs) + addRegisterClass(VecVT, &VE::V64RegClass); + for (MVT MaskVT : AllMaskVTs) + addRegisterClass(MaskVT, &VE::VMRegClass); } } @@ -285,7 +254,8 @@ } void VETargetLowering::initVPUActions() { - // TODO upstream vector isel + for (MVT LegalVecVT : AllVectorVTs) + setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom); } SDValue @@ -898,6 +868,7 @@ TARGET_NODE_CASE(GETTLSADDR) TARGET_NODE_CASE(MEMBARRIER) TARGET_NODE_CASE(CALL) + TARGET_NODE_CASE(VEC_BROADCAST) TARGET_NODE_CASE(RET_FLAG) TARGET_NODE_CASE(GLOBAL_BASE_REG) } @@ -1403,6 +1374,32 @@ return DAG.getMergeValues(Ops, DL); } +static SDValue getSplatValue(SDNode *N) { + if (auto *BuildVec = dyn_cast(N)) { + return BuildVec->getSplatValue(); + } + return SDValue(); +} + +SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + unsigned NumEls = Op.getValueType().getVectorNumElements(); + MVT ElemVT = Op.getSimpleValueType().getVectorElementType(); + + if (SDValue ScalarV = getSplatValue(Op.getNode())) { + // lower to VEC_BROADCAST + MVT LegalResVT = MVT::getVectorVT(ElemVT, 256); + + auto AVL = DAG.getConstant(NumEls, DL, MVT::i32); + return DAG.getNode(VEISD::VEC_BROADCAST, DL, LegalResVT, Op.getOperand(0), + AVL); + } + + // Expand + return SDValue(); +} + SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: @@ -1423,6 +1420,8 @@ return lowerJumpTable(Op, DAG); case ISD::LOAD: return lowerLOAD(Op, DAG); + case ISD::BUILD_VECTOR: + return lowerBUILD_VECTOR(Op, DAG); case ISD::STORE: return lowerSTORE(Op, DAG); case ISD::VASTART: diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -2224,3 +2224,6 @@ // The vevlintrin include "VEInstrIntrinsicVL.td" + +// Patterns and intermediate SD nodes (VEC_*). +include "VEInstrPatternsVec.td" diff --git a/llvm/lib/Target/VE/VEInstrPatternsVec.td b/llvm/lib/Target/VE/VEInstrPatternsVec.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEInstrPatternsVec.td @@ -0,0 +1,44 @@ +//===-- VEInstrPatternsVec.td - VEC_-type SDNodes and isel for VE Target --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the VEC_* prefixed intermediate SDNodes and their +// isel patterns. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +// Custom intermediate ISDs. +class IsVLVT : SDTCisVT; +def vec_broadcast : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2, [SDTCisVec<0>, IsVLVT<2>]>>; + +multiclass vbrd_elem32 { + // VBRDil + def : Pat<(v32 (vec_broadcast (s32 ImmOp:$sy), i32:$vl)), + (VBRDil (ImmCast $sy), i32:$vl)>; + + // VBRDrl + def : Pat<(v32 (vec_broadcast s32:$sy, i32:$vl)), + (VBRDrl + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $sy, SubRegIdx), + i32:$vl)>; +} + +defm : vbrd_elem32; +defm : vbrd_elem32; + +multiclass vbrd_elem64 { + // VBRDrl + def : Pat<(v64 (vec_broadcast s64:$sy, i32:$vl)), + (VBRDrl s64:$sy, i32:$vl)>; +} + +defm : vbrd_elem64; +defm : vbrd_elem64; diff --git a/llvm/lib/Target/VE/VERegisterInfo.td b/llvm/lib/Target/VE/VERegisterInfo.td --- a/llvm/lib/Target/VE/VERegisterInfo.td +++ b/llvm/lib/Target/VE/VERegisterInfo.td @@ -185,14 +185,7 @@ def V64 : RegisterClass<"VE", [v256f64, // default type for vector registers v512i32, v512f32, - v256i64, v256i32, v256f32, /* v256f64, */ - v128i64, v128i32, v128f32, v128f64, - v64i64, v64i32, v64f32, v64f64, - v32i64, v32i32, v32f32, v32f64, - v16i64, v16i32, v16f32, v16f64, - v8i64, v8i32, v8f32, v8f64, - v4i64, v4i32, v4f32, v4f64, - v2i64, v2i32, v2f32, v2f64], 64, + v256i64, v256i32, v256f32, /* v256f64, */], 64, (add (sequence "V%u", 0, 63), VIX)>; diff --git a/llvm/test/CodeGen/VE/Vector/vec_broadcast.ll b/llvm/test/CodeGen/VE/Vector/vec_broadcast.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vec_broadcast.ll @@ -0,0 +1,74 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s + +define fastcc <256 x i64> @brd_v256i64(i64 %s) { +; CHECK-LABEL: brd_v256i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vbrd %v0, %s0 +; CHECK-NEXT: b.l.t (, %s10) + %val = insertelement <256 x i64> undef, i64 %s, i32 0 + %ret = shufflevector <256 x i64> %val, <256 x i64> undef, <256 x i32> zeroinitializer + ret <256 x i64> %ret +} + +define fastcc <256 x double> @brd_v256f64(double %s) { +; CHECK-LABEL: brd_v256f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vbrd %v0, %s0 +; CHECK-NEXT: b.l.t (, %s10) + %val = insertelement <256 x double> undef, double %s, i32 0 + %ret = shufflevector <256 x double> %val, <256 x double> undef, <256 x i32> zeroinitializer + ret <256 x double> %ret +} + +define fastcc <256 x i32> @brd_v256i32(i32 %s) { +; CHECK-LABEL: brd_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vbrd %v0, %s0 +; CHECK-NEXT: b.l.t (, %s10) + %val = insertelement <256 x i32> undef, i32 %s, i32 0 + %ret = shufflevector <256 x i32> %val, <256 x i32> undef, <256 x i32> zeroinitializer + ret <256 x i32> %ret +} + +define fastcc <256 x i32> @brdi_v256i32(i32 %s) { +; CHECK-LABEL: brdi_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vbrd %v0, 13 +; CHECK-NEXT: b.l.t (, %s10) + %val = insertelement <256 x i32> undef, i32 13, i32 0 + %ret = shufflevector <256 x i32> %val, <256 x i32> undef, <256 x i32> zeroinitializer + ret <256 x i32> %ret +} + +define fastcc <256 x float> @brd_v256f32(float %s) { +; CHECK-LABEL: brd_v256f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vbrd %v0, %s0 +; CHECK-NEXT: b.l.t (, %s10) + %val = insertelement <256 x float> undef, float %s, i32 0 + %ret = shufflevector <256 x float> %val, <256 x float> undef, <256 x i32> zeroinitializer + ret <256 x float> %ret +} + +define fastcc <256 x float> @brdi_v256f32(float %s) { +; CHECK-LABEL: brdi_v256f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vbrd %v0, 0 +; CHECK-NEXT: b.l.t (, %s10) + %val = insertelement <256 x float> undef, float 0.e+00, i32 0 + %ret = shufflevector <256 x float> %val, <256 x float> undef, <256 x i32> zeroinitializer + ret <256 x float> %ret +}