Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -771,9 +771,16 @@ // Promote to a byte-sized store with upper bits zero if not // storing an integral number of bytes. For example, promote // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) + + EVT ScalarStVT = StVT.getScalarType(); EVT NVT = EVT::getIntegerVT(*DAG.getContext(), - StVT.getStoreSizeInBits()); - Value = DAG.getZeroExtendInReg(Value, dl, StVT); + ScalarStVT.getStoreSizeInBits()); + if (StVT.isVector()) { + NVT = EVT::getVectorVT(*DAG.getContext(), NVT, + StVT.getVectorNumElements()); + } + + Value = DAG.getZeroExtendInReg(Value, dl, ScalarStVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, isVolatile, isNonTemporal, Alignment, Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -340,8 +340,14 @@ unsigned SmallVTBits = DemandedSize; if (!isPowerOf2_32(SmallVTBits)) SmallVTBits = NextPowerOf2(SmallVTBits); + + EVT VT = Op.getValueType(); + unsigned NElts = VT.isVector() ? VT.getVectorNumElements() : 0; for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits); + if (NElts != 0) + SmallVT = EVT::getVectorVT(*DAG.getContext(), SmallVT, NElts); + if (TLI.isTruncateFree(Op.getValueType(), SmallVT) && TLI.isZExtFree(SmallVT, Op.getValueType())) { // We found a type with free casts. Index: test/CodeGen/R600/trunc-store-i1-vector.ll =================================================================== --- /dev/null +++ test/CodeGen/R600/trunc-store-i1-vector.ll @@ -0,0 +1,9 @@ +; XFAIL: * +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s + +; SI-LABEL: @global_truncstore_v4i32_to_v4i1 +define void @global_truncstore_v4i32_to_v4i1(<4 x i1> addrspace(1)* %out, <4 x i32> %val) nounwind { + %trunc = trunc <4 x i32> %val to <4 x i1> + store <4 x i1> %trunc, <4 x i1> addrspace(1)* %out + ret void +}