diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -1602,6 +1602,32 @@ } } +static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) { + if (!isa(N)) + return false; + const auto *BVN = cast(N); + + // Find first non-undef insertion. + unsigned Idx; + for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) { + auto ElemV = BVN->getOperand(Idx); + if (!ElemV->isUndef()) + break; + } + // Catch the (hypothetical) all-undef case. + if (Idx == BVN->getNumOperands()) + return false; + // Remember insertion. + UniqueIdx = Idx++; + // Verify that all other insertions are undef. + for (; Idx < BVN->getNumOperands(); ++Idx) { + auto ElemV = BVN->getOperand(Idx); + if (!ElemV->isUndef()) + return false; + } + return true; +} + static SDValue getSplatValue(SDNode *N) { if (auto *BuildVec = dyn_cast(N)) { return BuildVec->getSplatValue(); @@ -1615,6 +1641,17 @@ unsigned NumEls = Op.getValueType().getVectorNumElements(); MVT ElemVT = Op.getSimpleValueType().getVectorElementType(); + // If there is just one element, expand to INSERT_VECTOR_ELT. + unsigned UniqueIdx; + if (getUniqueInsertion(Op.getNode(), UniqueIdx)) { + SDValue AccuV = DAG.getUNDEF(Op.getValueType()); + auto ElemV = Op->getOperand(UniqueIdx); + SDValue IdxV = DAG.getConstant(UniqueIdx, DL, MVT::i64); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), AccuV, + ElemV, IdxV); + } + + // Else emit a broadcast. if (SDValue ScalarV = getSplatValue(Op.getNode())) { // lower to VEC_BROADCAST MVT LegalResVT = MVT::getVectorVT(ElemVT, 256); diff --git a/llvm/test/CodeGen/VE/Vector/expand_single_elem_build_vec.ll b/llvm/test/CodeGen/VE/Vector/expand_single_elem_build_vec.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/expand_single_elem_build_vec.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s + +; Function Attrs: norecurse nounwind readnone +; Check that a single-element insertion is lowered to a insert_vector_elt node for isel. +define fastcc <256 x i32> @expand_single_elem_build_vec(i32 %x, i32 %y) { +; CHECK-LABEL: expand_single_elem_build_vec: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lsv %v0(42), %s0 +; CHECK-NEXT: b.l.t (, %s10) + %r = insertelement <256 x i32> undef, i32 %x, i32 42 + ret <256 x i32> %r +} diff --git a/llvm/test/CodeGen/VE/Vector/insert_elt.ll b/llvm/test/CodeGen/VE/Vector/insert_elt.ll --- a/llvm/test/CodeGen/VE/Vector/insert_elt.ll +++ b/llvm/test/CodeGen/VE/Vector/insert_elt.ll @@ -15,9 +15,7 @@ define fastcc <256 x i64> @insert_ri7_v256i64(i64 %s) { ; CHECK-LABEL: insert_ri7_v256i64: ; CHECK: # %bb.0: -; CHECK-NEXT: lea %s0, 256 -; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vbrd %v0, %s0 +; CHECK-NEXT: lsv %v0(127), %s0 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x i64> undef, i64 %s, i32 127 ret <256 x i64> %ret @@ -26,9 +24,8 @@ define fastcc <256 x i64> @insert_ri8_v256i64(i64 %s) { ; CHECK-LABEL: insert_ri8_v256i64: ; CHECK: # %bb.0: -; CHECK-NEXT: lea %s0, 256 -; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vbrd %v0, %s0 +; CHECK-NEXT: lea %s1, 128 +; CHECK-NEXT: lsv %v0(%s1), %s0 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x i64> undef, i64 %s, i32 128 ret <256 x i64> %ret @@ -37,9 +34,7 @@ define fastcc <512 x i64> @insert_ri_v512i64(i64 %s) { ; CHECK-LABEL: insert_ri_v512i64: ; CHECK: # %bb.0: -; CHECK-NEXT: lea %s0, 256 -; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: lsv %v1(116), %s0 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <512 x i64> undef, i64 %s, i32 372 ret <512 x i64> %ret @@ -60,9 +55,8 @@ define fastcc <256 x i32> @insert_ri7_v256i32(i32 signext %s) { ; CHECK-LABEL: insert_ri7_v256i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lea %s0, 256 -; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vbrd %v0, %s0 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lsv %v0(127), %s0 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x i32> undef, i32 %s, i32 127 ret <256 x i32> %ret @@ -71,9 +65,9 @@ define fastcc <256 x i32> @insert_ri8_v256i32(i32 signext %s) { ; CHECK-LABEL: insert_ri8_v256i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lea %s0, 256 -; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vbrd %v0, %s0 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea %s1, 128 +; CHECK-NEXT: lsv %v0(%s1), %s0 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x i32> undef, i32 %s, i32 128 ret <256 x i32> %ret @@ -82,9 +76,12 @@ define fastcc <512 x i32> @insert_ri_v512i32(i32 signext %s) { ; CHECK-LABEL: insert_ri_v512i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lea %s0, 512 -; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vbrd %v0, %s0 +; CHECK-NEXT: lea %s1, 186 +; CHECK-NEXT: lvs %s2, %v0(%s1) +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: sll %s0, %s0, 32 +; CHECK-NEXT: or %s0, %s2, %s0 +; CHECK-NEXT: lsv %v0(%s1), %s0 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <512 x i32> undef, i32 %s, i32 372 ret <512 x i32> %ret @@ -122,9 +119,7 @@ define fastcc <256 x double> @insert_ri7_v256f64(double %s) { ; CHECK-LABEL: insert_ri7_v256f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lea %s0, 256 -; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vbrd %v0, %s0 +; CHECK-NEXT: lsv %v0(127), %s0 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x double> undef, double %s, i32 127 ret <256 x double> %ret @@ -133,9 +128,8 @@ define fastcc <256 x double> @insert_ri8_v256f64(double %s) { ; CHECK-LABEL: insert_ri8_v256f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lea %s0, 256 -; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vbrd %v0, %s0 +; CHECK-NEXT: lea %s1, 128 +; CHECK-NEXT: lsv %v0(%s1), %s0 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x double> undef, double %s, i32 128 ret <256 x double> %ret @@ -144,9 +138,7 @@ define fastcc <512 x double> @insert_ri_v512f64(double %s) { ; CHECK-LABEL: insert_ri_v512f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lea %s0, 256 -; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: lsv %v1(116), %s0 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <512 x double> undef, double %s, i32 372 ret <512 x double> %ret @@ -166,9 +158,7 @@ define fastcc <256 x float> @insert_ri7_v256f32(float %s) { ; CHECK-LABEL: insert_ri7_v256f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lea %s0, 256 -; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vbrd %v0, %s0 +; CHECK-NEXT: lsv %v0(127), %s0 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x float> undef, float %s, i32 127 ret <256 x float> %ret @@ -177,9 +167,8 @@ define fastcc <256 x float> @insert_ri8_v256f32(float %s) { ; CHECK-LABEL: insert_ri8_v256f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lea %s0, 256 -; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vbrd %v0, %s0 +; CHECK-NEXT: lea %s1, 128 +; CHECK-NEXT: lsv %v0(%s1), %s0 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x float> undef, float %s, i32 128 ret <256 x float> %ret @@ -188,9 +177,13 @@ define fastcc <512 x float> @insert_ri_v512f32(float %s) { ; CHECK-LABEL: insert_ri_v512f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lea %s0, 512 -; CHECK-NEXT: lvl %s0 -; CHECK-NEXT: vbrd %v0, %s0 +; CHECK-NEXT: sra.l %s0, %s0, 32 +; CHECK-NEXT: lea %s1, 186 +; CHECK-NEXT: lvs %s2, %v0(%s1) +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: sll %s0, %s0, 32 +; CHECK-NEXT: or %s0, %s2, %s0 +; CHECK-NEXT: lsv %v0(%s1), %s0 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <512 x float> undef, float %s, i32 372 ret <512 x float> %ret