diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1390,12 +1390,17 @@ } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { + assert((Node->getOpcode() == ISD::BUILD_VECTOR || + Node->getOpcode() == ISD::CONCAT_VECTORS) && + "Unexpected opcode!"); + // We can't handle this case efficiently. Allocate a sufficiently - // aligned object on the stack, store each element into it, then load + // aligned object on the stack, store each operand into it, then load // the result as a vector. // Create the stack frame object. EVT VT = Node->getValueType(0); - EVT EltVT = VT.getVectorElementType(); + EVT MemVT = isa(Node) ? VT.getVectorElementType() + : Node->getOperand(0).getValueType(); SDLoc dl(Node); SDValue FIPtr = DAG.CreateStackTemporary(VT); int FI = cast(FIPtr.getNode())->getIndex(); @@ -1404,7 +1409,7 @@ // Emit a store of each element to the stack slot. SmallVector Stores; - unsigned TypeByteSize = EltVT.getSizeInBits() / 8; + unsigned TypeByteSize = MemVT.getSizeInBits() / 8; assert(TypeByteSize > 0 && "Vector element type too small for stack store!"); // Store (in the right endianness) the elements to memory. for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { @@ -1417,11 +1422,11 @@ // If the destination vector element type is narrower than the source // element type, only store the bits necessary. - if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) { + if (MemVT.bitsLT(Node->getOperand(i).getValueType())) Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, - PtrInfo.getWithOffset(Offset), EltVT)); - } else + PtrInfo.getWithOffset(Offset), MemVT)); + else Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, PtrInfo.getWithOffset(Offset))); } diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll @@ -0,0 +1,38 @@ +; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s +; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 + +target triple = "aarch64-unknown-linux-gnu" + +; NOTE: Currently all CONCAT_VECTORS get expanded so there's little point in +; validating all combinations of vector type. + +define void @concat_vectors_v4i64(<2 x i64> %a, <2 x i64> %b, <4 x i64> *%c.addr) #0 { +; CHECK-LABEL: concat_vectors_v4i64: +; CHECK: stp q0, q1, [sp] +; CHECK: ptrue [[OUT_PG:p[0-9]+]].d, vl4 +; CHECK: mov x[[LO_ADDR:[0-9]+]], sp +; CHECK: ld1d { z{{[0-9]+}}.d }, [[OUT_PG]]/z, [x[[LO_ADDR]]] + %concat = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> + store <4 x i64> %concat, <4 x i64>* %c.addr + ret void +} + +define void @concat_vectors_v8i64(<4 x i64> *%a.addr, <4 x i64> *%b.addr, <8 x i64> *%c.addr) #0 { +; VBITS_GE_512-LABEL: concat_vectors_v8i64: +; VBITS_GE_512: ptrue [[IN_PG:p[0-9]+]].d, vl4 +; VBITS_GE_512: ld1d { [[LO:z[0-9]+]].d }, [[IN_PG]]/z, [x0] +; VBITS_GE_512: ld1d { [[HI:z[0-9]+]].d }, [[IN_PG]]/z, [x1] +; VBITS_GE_512: mov x[[LO_ADDR:[0-9]+]], sp +; VBITS_GE_512: orr x[[HI_ADDR:[0-9]+]], x[[LO_ADDR]], #0x20 +; VBITS_GE_512: st1d { [[LO]].d }, [[IN_PG]], [x[[LO_ADDR]]] +; VBITS_GE_512: st1d { [[HI]].d }, [[IN_PG]], [x[[HI_ADDR]]] +; VBITS_GE_512: ptrue [[OUT_PG:p[0-9]+]].d, vl8 +; VBITS_GE_512: ld1d { z{{[0-9]+}}.d }, [[OUT_PG]]/z, [x8] + %a = load <4 x i64>, <4 x i64>* %a.addr + %b = load <4 x i64>, <4 x i64>* %b.addr + %concat = shufflevector <4 x i64> %a, <4 x i64> %b, <8 x i32> + store <8 x i64> %concat, <8 x i64>* %c.addr + ret void +} + +attributes #0 = { nounwind "target-features"="+sve" }