Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6028,13 +6028,14 @@ unsigned BuildVecNumElts = BuildVect.getNumOperands(); unsigned TruncVecNumElts = VT.getVectorNumElements(); unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; + unsigned TruncEltBaseOffset = isLE ? 0 : (TruncEltOffset-1); assert((BuildVecNumElts % TruncVecNumElts) == 0 && "Invalid number of elements"); SmallVector Opnds; for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) - Opnds.push_back(BuildVect.getOperand(i)); + Opnds.push_back(BuildVect.getOperand(i+TruncEltBaseOffset)); return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } Index: test/CodeGen/ARM/big-endian-neon-trunc.ll =================================================================== --- test/CodeGen/ARM/big-endian-neon-trunc.ll +++ test/CodeGen/ARM/big-endian-neon-trunc.ll @@ -0,0 +1,52 @@ +; RUN: llc < %s -mtriple armeb-eabi -mattr v7,neon -o - | FileCheck %s + +; to check the folding of buildvector, bitcast, and truncate +; for example +; (v2i32 trunc (bitcast ((v4i32) buildvector a, b, c, d ) v2i64)) to +; ((v2i32 (buildvector b, d )) + +define void @buildvec_bitcast_trunc_2i64_to_2i32( <2 x i64>* %loadaddr, <2 x i32>* %storeaddr ) { +; CHECK-LABEL: buildvec_bitcast_trunc_2i64_to_2i32: +; CHECK: mul [[REG1:r[0-9]+]], +; CHECK: mul [[REG2:r[0-9]+]], +; CHECK: vmov.32 [[REG3:d[0-9]+]][0], [[REG1]] +; CHECK: vmov.32 [[REG3]][1], [[REG2]] +; CHECK: vstr [[REG3]] + %1 = load <2 x i64>* %loadaddr + %2 = mul <2 x i64> %1, %1 + %3 = trunc <2 x i64> %2 to <2 x i32> + store <2 x i32> %3, <2 x i32>* %storeaddr + ret void +} + +define void @buildvec_bitcast_trunc_2i64_to_2i16( <2 x i64>* %loadaddr, <2 x i16>* %storeaddr ) { +; CHECK-LABEL: buildvec_bitcast_trunc_2i64_to_2i16: +; CHECK: mul [[REG1:r[0-9]+]], +; CHECK: mul [[REG2:r[0-9]+]], +; CHECK: vmov.32 [[REG3:d[0-9]+]][0], [[REG1]] +; CHECK: vmov.32 [[REG3]][1], [[REG2]] +; CHECK: vuzp.16 [[REG3]] + %1 = load <2 x i64>* %loadaddr + %2 = mul <2 x i64> %1, %1 + %3 = trunc <2 x i64> %2 to <2 x i16> + store <2 x i16> %3, <2 x i16>* %storeaddr + ret void +} + +define void @buildvec_bitcast_trunc_2i64_to_2i8( <2 x i64>* %loadaddr, <2 x i8>* %storeaddr ) { +; CHECK-LABEL: buildvec_bitcast_trunc_2i64_to_2i8: +; CHECK: mul [[REG1:r[0-9]+]], +; CHECK: mul [[REG2:r[0-9]+]], +; CHECK: vmov.32 [[REG3:d[0-9]+]][0], [[REG1]] +; CHECK: vmov.32 [[REG3]][1], [[REG2]] +; CHECK: vmov.32 [[REG4:r[0-9]+]], [[REG3]][1] +; CHECK: strb [[REG4]] +; CHECK: vmov.32 [[REG5:r[0-9]+]], [[REG3]][0] +; CHECK: strb [[REG5]] + %1 = load <2 x i64>* %loadaddr + %2 = mul <2 x i64> %1, %1 + %3 = trunc <2 x i64> %2 to <2 x i8> + store <2 x i8> %3, <2 x i8>* %storeaddr + ret void +} +