Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15013,6 +15013,11 @@ unsigned NumElts = VT.getVectorNumElements(); unsigned EltSizeInBits = VT.getScalarSizeInBits(); unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits(); + unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits(); + + if (ExtDstSizeInBits % ExtSrcSizeInBits != 0) + return SDValue(); + unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits; // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1> // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1> @@ -15034,11 +15039,10 @@ if (EltSizeInBits != ExtSrcSizeInBits) return SDValue(); - // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for - // power-of-2 truncations as they are the most likely. - for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) - if (isTruncate(Scale)) - return DAG.getBitcast(VT, N00); + // We can remove *extend_vector_inreg only if the truncation happens at + // the same scale as the extension. + if (isTruncate(ExtScale)) + return DAG.getBitcast(VT, N00); return SDValue(); } Index: llvm/trunk/test/CodeGen/X86/vector-truncate-combine.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-truncate-combine.ll +++ llvm/trunk/test/CodeGen/X86/vector-truncate-combine.ll @@ -0,0 +1,35 @@ +; RUN: llc -mtriple=x86_64-- -O2 -start-after=stack-protector -stop-before=loops %s -o - | FileCheck %s + +; This test verifies the fix for PR33368. +; +; The expected outcome of the operation is to store bytes 0 and 2 of the incoming +; parameter into c2 (a 2 x i8 vector). DAGCombine converts shuffles into a +; sequence of extend and subsequent truncate operations. The bug was that an extension +; by 4 followed by a truncation by 8 was completely eliminated. + +; The test checks for the correct sequence of operations that results from the +; preservation of the extend/truncate operations mentioned above (2 extend and +; 3 truncate instructions). +; +; NOTE: This operation could be collapsed in to a single truncate. Once that is done +; this test will have to be adjusted. + +; CHECK: PUNPCKLBWrr +; CHECK: PUNPCKLWDrr +; CHECK: PACKUSWBrr +; CHECK: PACKUSWBrr +; CHECK: PACKUSWBrr + +define void @test(double %vec.coerce) local_unnamed_addr { +entry: + %c2 = alloca <2 x i8>, align 2 + %0 = bitcast double %vec.coerce to <8 x i8> + %1 = shufflevector <8 x i8> %0, <8 x i8> undef, <4 x i32> + %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <2 x i32> + store volatile <2 x i8> %2, <2 x i8>* %c2, align 2 + br label %if.end + +if.end: + %3 = bitcast <2 x i8> %2 to i16 + ret void +}