Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1501,6 +1501,14 @@ EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) { + SDValue Value, NewChain; + std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG); + std::tie(Lo, Hi) = DAG.SplitVector(Value, dl); + ReplaceValueWith(SDValue(LD, 1), NewChain); + return; + } + Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo); @@ -3664,6 +3672,20 @@ LoadSDNode *LD = cast(N); ISD::LoadExtType ExtType = LD->getExtensionType(); + // A vector must always be stored in memory as-is, i.e. without any padding + // between the elements, since various code depend on it, e.g. in the + // handling of a bitcast of a vector type to int, which may be done with a + // vector store followed by an integer load. A vector that does not have + // elements that are byte-sized must therefore be stored as an integer + // built out of the extracted vector elements. + if (!LD->getMemoryVT().isByteSized()) { + SDValue Value, NewChain; + std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG); + ReplaceValueWith(SDValue(LD, 0), Value); + ReplaceValueWith(SDValue(LD, 1), NewChain); + return SDValue(); + } + SDValue Result; SmallVector LdChain; // Chain for the series of load if (ExtType != ISD::NON_EXTLOAD) Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6578,12 +6578,48 @@ SDValue Chain = LD->getChain(); SDValue BasePTR = LD->getBasePtr(); EVT SrcVT = LD->getMemoryVT(); + EVT DstVT = LD->getValueType(0); ISD::LoadExtType ExtType = LD->getExtensionType(); unsigned NumElem = SrcVT.getVectorNumElements(); EVT SrcEltVT = SrcVT.getScalarType(); - EVT DstEltVT = LD->getValueType(0).getScalarType(); + EVT DstEltVT = DstVT.getScalarType(); + + // A vector must always be stored in memory as-is, i.e. without any padding + // between the elements, since various code depend on it, e.g. in the + // handling of a bitcast of a vector type to int, which may be done with a + // vector store followed by an integer load. A vector that does not have + // elements that are byte-sized must therefore be stored as an integer + // built out of the extracted vector elements. + if (!SrcEltVT.isByteSized()) { + unsigned NumBits = SrcVT.getSizeInBits(); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits); + + SDValue Load = DAG.getLoad(IntVT, SL, Chain, BasePTR, LD->getPointerInfo(), + LD->getAlignment(), + LD->getMemOperand()->getFlags(), + LD->getAAInfo()); + + SmallVector Vals; + for (unsigned Idx = 0; Idx < NumElem; ++Idx) { + unsigned ShiftIntoIdx = + (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx); + SDValue ShiftAmount = + DAG.getConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(), SL, IntVT); + SDValue ShiftedElt = + DAG.getNode(ISD::SRL, SL, IntVT, Load, ShiftAmount); + SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, ShiftedElt); + if (ExtType != ISD::NON_EXTLOAD) { + unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType); + Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar); + } + Vals.push_back(Scalar); + } + + SDValue Value = DAG.getBuildVector(DstVT, SL, Vals); + return std::make_pair(Value, Load.getValue(1)); + } unsigned Stride = SrcEltVT.getSizeInBits() / 8; assert(SrcEltVT.isByteSized()); @@ -6605,7 +6641,7 @@ } SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains); - SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals); + SDValue Value = DAG.getBuildVector(DstVT, SL, Vals); return std::make_pair(Value, NewChain); } Index: llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll =================================================================== --- llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll +++ llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll @@ -121,18 +121,20 @@ define void @fun3(<3 x i31>* %src, <3 x i31>* %p) ; CHECK-LABEL: fun3: ; CHECK: # %bb.0: -; CHECK-NEXT: llgf %r0, 3(%r2) -; CHECK-NEXT: llgf %r1, 6(%r2) -; CHECK-NEXT: llgf %r2, 0(%r2) -; CHECK-NEXT: rosbg %r1, %r0, 0, 32, 31 -; CHECK-NEXT: sllg %r4, %r2, 62 -; CHECK-NEXT: rosbg %r4, %r0, 0, 32, 31 -; CHECK-NEXT: srlg %r0, %r4, 32 -; CHECK-NEXT: st %r1, 8(%r3) -; CHECK-NEXT: sllg %r1, %r2, 30 -; CHECK-NEXT: lr %r1, %r0 -; CHECK-NEXT: nihh %r1, 8191 -; CHECK-NEXT: stg %r1, 0(%r3) +; CHECK-NEXT: l %r0, 8(%r2) +; CHECK-NEXT: lg %r1, 0(%r2) +; CHECK-NEXT: sllg %r2, %r1, 32 +; CHECK-NEXT: lr %r2, %r0 +; CHECK-NEXT: srlg %r0, %r2, 62 +; CHECK-NEXT: st %r2, 8(%r3) +; CHECK-NEXT: rosbg %r0, %r1, 33, 61, 34 +; CHECK-NEXT: sllg %r1, %r0, 62 +; CHECK-NEXT: rosbg %r1, %r2, 2, 32, 0 +; CHECK-NEXT: srlg %r1, %r1, 32 +; CHECK-NEXT: sllg %r0, %r0, 30 +; CHECK-NEXT: lr %r0, %r1 +; CHECK-NEXT: nihh %r0, 8191 +; CHECK-NEXT: stg %r0, 0(%r3) ; CHECK-NEXT: br %r14 { %tmp = load <3 x i31>, <3 x i31>* %src Index: llvm/test/CodeGen/X86/load-local-v3i1.ll =================================================================== --- llvm/test/CodeGen/X86/load-local-v3i1.ll +++ llvm/test/CodeGen/X86/load-local-v3i1.ll @@ -96,27 +96,21 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movq %rdi, %r14 -; CHECK-NEXT: movzbl (%rdx), %ebp -; CHECK-NEXT: movl %ebp, %eax -; CHECK-NEXT: shrl %eax -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: movl %ebp, %ecx -; CHECK-NEXT: andl $1, %ecx -; CHECK-NEXT: movd %ecx, %xmm0 -; CHECK-NEXT: pinsrd $1, %eax, %xmm0 -; CHECK-NEXT: shrl $2, %ebp -; CHECK-NEXT: andl $1, %ebp -; CHECK-NEXT: pinsrd $2, %ebp, %xmm0 -; CHECK-NEXT: movd %xmm0, %ebx -; CHECK-NEXT: pextrd $1, %xmm0, %r15d +; CHECK-NEXT: movb (%rdx), %al +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shrb $2, %cl +; CHECK-NEXT: movzbl %al, %r15d +; CHECK-NEXT: shrb %al +; CHECK-NEXT: movzbl %al, %ebx +; CHECK-NEXT: movzbl %cl, %ebp ; CHECK-NEXT: movq %rsi, %rdi -; CHECK-NEXT: movl %ebx, %esi -; CHECK-NEXT: movl %r15d, %edx +; CHECK-NEXT: movl %r15d, %esi +; CHECK-NEXT: movl %ebx, %edx ; CHECK-NEXT: movl %ebp, %ecx ; CHECK-NEXT: callq masked_load_v3 ; CHECK-NEXT: movq %r14, %rdi -; CHECK-NEXT: movl %ebx, %esi -; CHECK-NEXT: movl %r15d, %edx +; CHECK-NEXT: movl %r15d, %esi +; CHECK-NEXT: movl %ebx, %edx ; CHECK-NEXT: movl %ebp, %ecx ; CHECK-NEXT: callq masked_store4_v3 ; CHECK-NEXT: addq $8, %rsp