diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1103,7 +1103,7 @@ return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask)); } - if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && + if (match(Op0, m_ZExt(m_Value(X))) && (!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType()))) { assert(ShAmt < X->getType()->getScalarSizeInBits() && "Big shift not simplified to zero?"); diff --git a/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll b/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll --- a/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll +++ b/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll @@ -5,9 +5,9 @@ define i16 @test1(i16 %a) { ; CHECK-LABEL: @test1( -; CHECK-NEXT: [[C:%.*]] = lshr i16 [[A:%.*]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i16 [[A:%.*]], 8 ; CHECK-NEXT: [[D:%.*]] = mul i16 [[A]], 5 -; CHECK-NEXT: [[E:%.*]] = or i16 [[C]], [[D]] +; CHECK-NEXT: [[E:%.*]] = or i16 [[D]], [[TMP1]] ; CHECK-NEXT: ret i16 [[E]] ; %b = zext i16 %a to i32 ; [#uses=2] @@ -20,9 +20,9 @@ define <2 x i16> @test1_vec(<2 x i16> %a) { ; CHECK-LABEL: @test1_vec( -; CHECK-NEXT: [[C:%.*]] = lshr <2 x i16> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i16> [[A:%.*]], ; CHECK-NEXT: [[D:%.*]] = mul <2 x i16> [[A]], -; CHECK-NEXT: [[E:%.*]] = or <2 x i16> [[C]], [[D]] +; CHECK-NEXT: [[E:%.*]] = or <2 x i16> [[D]], [[TMP1]] ; CHECK-NEXT: ret <2 x i16> [[E]] ; %b = zext <2 x i16> %a to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/apint-cast.ll b/llvm/test/Transforms/InstCombine/apint-cast.ll --- a/llvm/test/Transforms/InstCombine/apint-cast.ll +++ b/llvm/test/Transforms/InstCombine/apint-cast.ll @@ -7,9 +7,9 @@ define i17 @test1(i17 %a) { ; CHECK-LABEL: @test1( -; CHECK-NEXT: [[C:%.*]] = lshr i17 [[A:%.*]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i17 [[A:%.*]], 8 ; CHECK-NEXT: [[D:%.*]] = shl i17 [[A]], 8 -; CHECK-NEXT: [[E:%.*]] = or i17 [[C]], [[D]] +; CHECK-NEXT: [[E:%.*]] = or i17 [[D]], [[TMP1]] ; CHECK-NEXT: ret i17 [[E]] ; %b = zext i17 %a to i37 ; [#uses=2] @@ -22,9 +22,9 @@ define i167 @test2(i167 %a) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[C:%.*]] = lshr i167 [[A:%.*]], 9 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i167 [[A:%.*]], 9 ; CHECK-NEXT: [[D:%.*]] = shl i167 [[A]], 8 -; CHECK-NEXT: [[E:%.*]] = or i167 [[C]], [[D]] +; CHECK-NEXT: [[E:%.*]] = or i167 [[D]], [[TMP1]] ; CHECK-NEXT: ret i167 [[E]] ; %b = zext i167 %a to i577 ; [#uses=2] diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -472,9 +472,9 @@ define i16 @test40(i16 %a) { ; ALL-LABEL: @test40( -; ALL-NEXT: [[T21:%.*]] = lshr i16 [[A:%.*]], 9 +; ALL-NEXT: [[TMP1:%.*]] = lshr i16 [[A:%.*]], 9 ; ALL-NEXT: [[T5:%.*]] = shl i16 [[A]], 8 -; ALL-NEXT: [[T32:%.*]] = or i16 [[T21]], [[T5]] +; ALL-NEXT: [[T32:%.*]] = or i16 [[T5]], [[TMP1]] ; ALL-NEXT: ret i16 [[T32]] ; %t = zext i16 %a to i32 @@ -487,9 +487,9 @@ define <2 x i16> @test40vec(<2 x i16> %a) { ; ALL-LABEL: @test40vec( -; ALL-NEXT: [[T21:%.*]] = lshr <2 x i16> [[A:%.*]], +; ALL-NEXT: [[TMP1:%.*]] = lshr <2 x i16> [[A:%.*]], ; ALL-NEXT: [[T5:%.*]] = shl <2 x i16> [[A]], -; ALL-NEXT: [[T32:%.*]] = or <2 x i16> [[T21]], [[T5]] +; ALL-NEXT: [[T32:%.*]] = or <2 x i16> [[T5]], [[TMP1]] ; ALL-NEXT: ret <2 x i16> [[T32]] ; %t = zext <2 x i16> %a to <2 x i32> @@ -2084,8 +2084,8 @@ ; ALL-LABEL: @trunc_lshr_zext_uses1( ; ALL-NEXT: [[B:%.*]] = zext <2 x i8> [[A:%.*]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[B]]) -; ALL-NEXT: [[C:%.*]] = lshr <2 x i8> [[A]], -; ALL-NEXT: ret <2 x i8> [[C]] +; ALL-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[A]], +; ALL-NEXT: ret <2 x i8> [[TMP1]] ; %B = zext <2 x i8> %A to <2 x i32> call void @use_v2i32(<2 x i32> %B) diff --git a/llvm/test/Transforms/InstCombine/pr50555.ll b/llvm/test/Transforms/InstCombine/pr50555.ll --- a/llvm/test/Transforms/InstCombine/pr50555.ll +++ b/llvm/test/Transforms/InstCombine/pr50555.ll @@ -3,11 +3,11 @@ define void @trunc_one_add(i16* %a, i8 %b) { ; CHECK-LABEL: @trunc_one_add( -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[B:%.*]] to i32 -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ZEXT]], 1 -; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[SHR]], [[ZEXT]] -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[ADD]] to i16 -; CHECK-NEXT: store i16 [[TRUNC]], i16* [[A:%.*]], align 2 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[B:%.*]] to i16 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[B]], 1 +; CHECK-NEXT: [[SHR:%.*]] = zext i8 [[TMP1]] to i16 +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i16 [[ZEXT]], [[SHR]] +; CHECK-NEXT: store i16 [[ADD]], i16* [[A:%.*]], align 2 ; CHECK-NEXT: ret void ; %zext = zext i8 %b to i32 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr50555.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr50555.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr50555.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr50555.ll @@ -4,71 +4,38 @@ define void @trunc_through_one_add(i16* noalias %0, i8* noalias readonly %1) { ; SSE-LABEL: @trunc_through_one_add( -; SSE-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP1:%.*]] to <4 x i8>* -; SSE-NEXT: [[TMP4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1 -; SSE-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> -; SSE-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], -; SSE-NEXT: [[TMP7:%.*]] = add nuw nsw <4 x i32> [[TMP6]], [[TMP5]] -; SSE-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP7]], -; SSE-NEXT: [[TMP9:%.*]] = trunc <4 x i32> [[TMP8]] to <4 x i16> -; SSE-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP0:%.*]] to <4 x i16>* -; SSE-NEXT: store <4 x i16> [[TMP9]], <4 x i16>* [[TMP10]], align 2 -; SSE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 4 -; SSE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 4 -; SSE-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to <4 x i8>* -; SSE-NEXT: [[TMP14:%.*]] = load <4 x i8>, <4 x i8>* [[TMP13]], align 1 -; SSE-NEXT: [[TMP15:%.*]] = zext <4 x i8> [[TMP14]] to <4 x i32> -; SSE-NEXT: [[TMP16:%.*]] = lshr <4 x i32> [[TMP15]], -; SSE-NEXT: [[TMP17:%.*]] = add nuw nsw <4 x i32> [[TMP16]], [[TMP15]] -; SSE-NEXT: [[TMP18:%.*]] = lshr <4 x i32> [[TMP17]], -; SSE-NEXT: [[TMP19:%.*]] = trunc <4 x i32> [[TMP18]] to <4 x i16> -; SSE-NEXT: [[TMP20:%.*]] = bitcast i16* [[TMP12]] to <4 x i16>* -; SSE-NEXT: store <4 x i16> [[TMP19]], <4 x i16>* [[TMP20]], align 2 -; SSE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 8 -; SSE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 8 -; SSE-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP21]] to <4 x i8>* -; SSE-NEXT: [[TMP24:%.*]] = load <4 x i8>, <4 x i8>* [[TMP23]], align 1 -; SSE-NEXT: [[TMP25:%.*]] = zext <4 x i8> [[TMP24]] to <4 x i32> -; SSE-NEXT: [[TMP26:%.*]] = lshr <4 x i32> [[TMP25]], -; SSE-NEXT: [[TMP27:%.*]] = add nuw nsw <4 x i32> [[TMP26]], [[TMP25]] -; SSE-NEXT: [[TMP28:%.*]] = lshr <4 x i32> [[TMP27]], -; SSE-NEXT: [[TMP29:%.*]] = trunc <4 x i32> [[TMP28]] to <4 x i16> -; SSE-NEXT: [[TMP30:%.*]] = bitcast i16* [[TMP22]] to <4 x i16>* -; SSE-NEXT: store <4 x i16> [[TMP29]], <4 x i16>* [[TMP30]], align 2 -; SSE-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 12 -; SSE-NEXT: [[TMP32:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 12 -; SSE-NEXT: [[TMP33:%.*]] = bitcast i8* [[TMP31]] to <4 x i8>* -; SSE-NEXT: [[TMP34:%.*]] = load <4 x i8>, <4 x i8>* [[TMP33]], align 1 -; SSE-NEXT: [[TMP35:%.*]] = zext <4 x i8> [[TMP34]] to <4 x i32> -; SSE-NEXT: [[TMP36:%.*]] = lshr <4 x i32> [[TMP35]], -; SSE-NEXT: [[TMP37:%.*]] = add nuw nsw <4 x i32> [[TMP36]], [[TMP35]] -; SSE-NEXT: [[TMP38:%.*]] = lshr <4 x i32> [[TMP37]], -; SSE-NEXT: [[TMP39:%.*]] = trunc <4 x i32> [[TMP38]] to <4 x i16> -; SSE-NEXT: [[TMP40:%.*]] = bitcast i16* [[TMP32]] to <4 x i16>* -; SSE-NEXT: store <4 x i16> [[TMP39]], <4 x i16>* [[TMP40]], align 2 +; SSE-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP1:%.*]] to <8 x i8>* +; SSE-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[TMP3]], align 1 +; SSE-NEXT: [[TMP5:%.*]] = zext <8 x i8> [[TMP4]] to <8 x i16> +; SSE-NEXT: [[TMP6:%.*]] = lshr <8 x i8> [[TMP4]], +; SSE-NEXT: [[TMP7:%.*]] = zext <8 x i8> [[TMP6]] to <8 x i16> +; SSE-NEXT: [[TMP8:%.*]] = add nuw nsw <8 x i16> [[TMP7]], [[TMP5]] +; SSE-NEXT: [[TMP9:%.*]] = lshr <8 x i16> [[TMP8]], +; SSE-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP0:%.*]] to <8 x i16>* +; SSE-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* [[TMP10]], align 2 +; SSE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 8 +; SSE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 8 +; SSE-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to <8 x i8>* +; SSE-NEXT: [[TMP14:%.*]] = load <8 x i8>, <8 x i8>* [[TMP13]], align 1 +; SSE-NEXT: [[TMP15:%.*]] = zext <8 x i8> [[TMP14]] to <8 x i16> +; SSE-NEXT: [[TMP16:%.*]] = lshr <8 x i8> [[TMP14]], +; SSE-NEXT: [[TMP17:%.*]] = zext <8 x i8> [[TMP16]] to <8 x i16> +; SSE-NEXT: [[TMP18:%.*]] = add nuw nsw <8 x i16> [[TMP17]], [[TMP15]] +; SSE-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], +; SSE-NEXT: [[TMP20:%.*]] = bitcast i16* [[TMP12]] to <8 x i16>* +; SSE-NEXT: store <8 x i16> [[TMP19]], <8 x i16>* [[TMP20]], align 2 ; SSE-NEXT: ret void ; ; AVX-LABEL: @trunc_through_one_add( -; AVX-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP1:%.*]] to <8 x i8>* -; AVX-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[TMP3]], align 1 -; AVX-NEXT: [[TMP5:%.*]] = zext <8 x i8> [[TMP4]] to <8 x i32> -; AVX-NEXT: [[TMP6:%.*]] = lshr <8 x i32> [[TMP5]], -; AVX-NEXT: [[TMP7:%.*]] = add nuw nsw <8 x i32> [[TMP6]], [[TMP5]] -; AVX-NEXT: [[TMP8:%.*]] = lshr <8 x i32> [[TMP7]], -; AVX-NEXT: [[TMP9:%.*]] = trunc <8 x i32> [[TMP8]] to <8 x i16> -; AVX-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP0:%.*]] to <8 x i16>* -; AVX-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* [[TMP10]], align 2 -; AVX-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 8 -; AVX-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 8 -; AVX-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to <8 x i8>* -; AVX-NEXT: [[TMP14:%.*]] = load <8 x i8>, <8 x i8>* [[TMP13]], align 1 -; AVX-NEXT: [[TMP15:%.*]] = zext <8 x i8> [[TMP14]] to <8 x i32> -; AVX-NEXT: [[TMP16:%.*]] = lshr <8 x i32> [[TMP15]], -; AVX-NEXT: [[TMP17:%.*]] = add nuw nsw <8 x i32> [[TMP16]], [[TMP15]] -; AVX-NEXT: [[TMP18:%.*]] = lshr <8 x i32> [[TMP17]], -; AVX-NEXT: [[TMP19:%.*]] = trunc <8 x i32> [[TMP18]] to <8 x i16> -; AVX-NEXT: [[TMP20:%.*]] = bitcast i16* [[TMP12]] to <8 x i16>* -; AVX-NEXT: store <8 x i16> [[TMP19]], <8 x i16>* [[TMP20]], align 2 +; AVX-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP1:%.*]] to <16 x i8>* +; AVX-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 1 +; AVX-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[TMP4]] to <16 x i16> +; AVX-NEXT: [[TMP6:%.*]] = lshr <16 x i8> [[TMP4]], +; AVX-NEXT: [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i16> +; AVX-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i16> [[TMP7]], [[TMP5]] +; AVX-NEXT: [[TMP9:%.*]] = lshr <16 x i16> [[TMP8]], +; AVX-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP0:%.*]] to <16 x i16>* +; AVX-NEXT: store <16 x i16> [[TMP9]], <16 x i16>* [[TMP10]], align 2 ; AVX-NEXT: ret void ; %3 = load i8, i8* %1, align 1