Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2361,10 +2361,7 @@ break; } - // Support big-endian targets when it becomes useful. bool IsLE = getDataLayout().isLittleEndian(); - if (!IsLE) - break; // Bitcast 'small element' vector to 'large element' scalar/vector. if ((BitWidth % SubBitWidth) == 0) { @@ -2383,8 +2380,9 @@ for (unsigned i = 0; i != SubScale; ++i) { computeKnownBits(N0, Known2, SubDemandedElts.shl(i), Depth + 1); - Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * i); - Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i); + unsigned Shifts = IsLE ? i : SubScale - 1 - i; + Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts); + Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * Shifts); } } @@ -2406,7 +2404,8 @@ Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0; i != NumElts; ++i) if (DemandedElts[i]) { - unsigned Offset = (i % SubScale) * BitWidth; + unsigned Shifts = IsLE ? i : NumElts - 1 - i; + unsigned Offset = (Shifts % SubScale) * BitWidth; Known.One &= Known2.One.lshr(Offset).trunc(BitWidth); Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth); // If we don't know any bits, early out. Index: test/CodeGen/SystemZ/dag-combine-03.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/dag-combine-03.ll @@ -0,0 +1,29 @@ +; Test that DAGCombiner gets helped by getKnownBitsForTargetNode() when +; BITCAST nodes are involved on a big-endian target. +; +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s + +define void @fun() { +entry: + br label %lab0 + +lab0: + %phi = phi i64 [ %sel, %lab0 ], [ 0, %entry ] + %add = add nuw nsw i64 %phi, 1 + %cmp = icmp eq i64 %add, undef + %ins = insertelement <2 x i1> undef, i1 %cmp, i32 0 + %xor = xor <2 x i1> %ins, + %extr = extractelement <2 x i1> %xor, i32 0 +; The EXTRACT_VECTOR_ELT is done first into an i32, and then AND:ed with +; 1. The AND is not actually necessary since the element contains a CC (i1) +; value. Test that the BITCAST nodes in the DAG when computing KnownBits is +; handled so that the AND is removed. If this succeeds, this results in a CHI +; instead of TMLL. + +; CHECK-LABEL: # %bb.0: +; CHECK: chi +; CHECK-NOT: tmll +; CHECK: j + %sel = select i1 %extr, i64 %add, i64 0 + br label %lab0 +}