diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1182,6 +1182,47 @@ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); break; } + + // Handle cast from vector integer type to scalar or vector integer. + auto *SrcVecTy = dyn_cast(SrcTy); + if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() || + !I->getType()->isIntOrIntVectorTy()) + break; + + // Look through a cast from narrow vector elements to wider type. + // Examples: v4i32 -> v2i64, v3i8 -> v24 + unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits(); + if (BitWidth % SubBitWidth == 0) { + // Known bits are automatically intersected across demanded elements of a + // vector. So for example, if a bit is computed as known zero, it must be + // zero across all demanded elements of the vector. + // + // For this bitcast, each demanded element of the output is sub-divided + // across a set of smaller vector elements in the source vector. To get + // the known bits for an entire element of the output, compute the known + // bits for each sub-element sequentially. This is done by shifting the + // one-set-bit demanded elements parameter across the sub-elements for + // consecutive calls to computeKnownBits. We are using the demanded + // elements parameter as a mask operator. + // + // The known bits of each sub-element are then inserted into place + // (dependent on endian) to form the full result of known bits. + unsigned NumElts = DemandedElts.getBitWidth(); + unsigned SubScale = BitWidth / SubBitWidth; + APInt SubDemandedElts = APInt::getNullValue(NumElts * SubScale); + for (unsigned i = 0; i != NumElts; ++i) { + if (DemandedElts[i]) + SubDemandedElts.setBit(i * SubScale); + } + + KnownBits KnownSrc(SubBitWidth); + for (unsigned i = 0; i != SubScale; ++i) { + computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc, + Depth + 1, Q); + unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i; + Known.insertBits(KnownSrc, ShiftElt * SubBitWidth); + } + } break; } case Instruction::SExt: { diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll --- a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll @@ -2762,14 +2762,18 @@ ret <2 x i64> %2 } +; The shift amount is in range (masked with 31 and high 32-bits are zero), +; so convert to standard IR - https://llvm.org/PR50123 + define <2 x i64> @sse2_psll_q_128_masked_bitcast(<2 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @sse2_psll_q_128_masked_bitcast( ; CHECK-NEXT: [[B:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32> ; CHECK-NEXT: [[M:%.*]] = and <4 x i32> [[B]], ; CHECK-NEXT: [[I:%.*]] = insertelement <4 x i32> [[M]], i32 0, i32 1 ; CHECK-NEXT: [[SHAMT:%.*]] = bitcast <4 x i32> [[I]] to <2 x i64> -; CHECK-NEXT: [[R:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V:%.*]], <2 x i64> [[SHAMT]]) -; CHECK-NEXT: ret <2 x i64> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[SHAMT]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[V:%.*]], [[TMP1]] +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %b = bitcast <2 x i64> %a to <4 x i32> %m = and <4 x i32> %b, @@ -2779,6 +2783,8 @@ ret <2 x i64> %r } +; TODO: This could be recognized as an over-shift. + define <2 x i64> @sse2_psll_q_128_masked_bitcast_overshift(<2 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @sse2_psll_q_128_masked_bitcast_overshift( ; CHECK-NEXT: [[B:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32> diff --git a/llvm/test/Transforms/InstSimplify/shift-knownbits.ll b/llvm/test/Transforms/InstSimplify/shift-knownbits.ll --- a/llvm/test/Transforms/InstSimplify/shift-knownbits.ll +++ b/llvm/test/Transforms/InstSimplify/shift-knownbits.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -instsimplify -S -data-layout="E" | FileCheck %s +; RUN: opt < %s -instsimplify -S -data-layout="E" | FileCheck %s --check-prefixes=CHECK,BIGENDIAN +; RUN: opt < %s -instsimplify -S -data-layout="e" | FileCheck %s --check-prefixes=CHECK,LITTLEENDIAN ; If any bits of the shift amount are known to make it exceed or equal ; the number of bits in the type, the shift causes undefined behavior. @@ -223,12 +224,11 @@ ret i8 %ex } +; The shift amount is 0 on either of high/low bytes. The middle byte doesn't matter. + define i24 @bitcast_noshift_scalar(<3 x i8> %v1, i24 %v2) { ; CHECK-LABEL: @bitcast_noshift_scalar( -; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> , <3 x i32> -; CHECK-NEXT: [[B:%.*]] = bitcast <3 x i8> [[S]] to i24 -; CHECK-NEXT: [[R:%.*]] = shl i24 [[V2:%.*]], [[B]] -; CHECK-NEXT: ret i24 [[R]] +; CHECK-NEXT: ret i24 [[V2:%.*]] ; %c = insertelement <3 x i8> poison, i8 0, i64 0 %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> @@ -237,12 +237,17 @@ ret i24 %r } +; The shift amount is 0 on low byte of big-endian and unknown on little-endian. + define i24 @bitcast_noshift_scalar_bigend(<3 x i8> %v1, i24 %v2) { -; CHECK-LABEL: @bitcast_noshift_scalar_bigend( -; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> , <3 x i32> -; CHECK-NEXT: [[B:%.*]] = bitcast <3 x i8> [[S]] to i24 -; CHECK-NEXT: [[R:%.*]] = shl i24 [[V2:%.*]], [[B]] -; CHECK-NEXT: ret i24 [[R]] +; BIGENDIAN-LABEL: @bitcast_noshift_scalar_bigend( +; BIGENDIAN-NEXT: ret i24 [[V2:%.*]] +; +; LITTLEENDIAN-LABEL: @bitcast_noshift_scalar_bigend( +; LITTLEENDIAN-NEXT: [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> , <3 x i32> +; LITTLEENDIAN-NEXT: [[B:%.*]] = bitcast <3 x i8> [[S]] to i24 +; LITTLEENDIAN-NEXT: [[R:%.*]] = shl i24 [[V2:%.*]], [[B]] +; LITTLEENDIAN-NEXT: ret i24 [[R]] ; %c = insertelement <3 x i8> poison, i8 0, i64 0 %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> @@ -251,12 +256,17 @@ ret i24 %r } +; The shift amount is 0 on low byte of little-endian and unknown on big-endian. + define i24 @bitcast_noshift_scalar_littleend(<3 x i8> %v1, i24 %v2) { -; CHECK-LABEL: @bitcast_noshift_scalar_littleend( -; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> , <3 x i32> -; CHECK-NEXT: [[B:%.*]] = bitcast <3 x i8> [[S]] to i24 -; CHECK-NEXT: [[R:%.*]] = shl i24 [[V2:%.*]], [[B]] -; CHECK-NEXT: ret i24 [[R]] +; BIGENDIAN-LABEL: @bitcast_noshift_scalar_littleend( +; BIGENDIAN-NEXT: [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> , <3 x i32> +; BIGENDIAN-NEXT: [[B:%.*]] = bitcast <3 x i8> [[S]] to i24 +; BIGENDIAN-NEXT: [[R:%.*]] = shl i24 [[V2:%.*]], [[B]] +; BIGENDIAN-NEXT: ret i24 [[R]] +; +; LITTLEENDIAN-LABEL: @bitcast_noshift_scalar_littleend( +; LITTLEENDIAN-NEXT: ret i24 [[V2:%.*]] ; %c = insertelement <3 x i8> poison, i8 0, i64 0 %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> @@ -265,12 +275,12 @@ ret i24 %r } +; The shift amount is known 24 on little-endian and known 24<<16 on big-endian +; across all vector elements, so it's an overshift either way. + define <3 x i24> @bitcast_overshift_vector(<9 x i8> %v1, <3 x i24> %v2) { ; CHECK-LABEL: @bitcast_overshift_vector( -; CHECK-NEXT: [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> , <9 x i32> -; CHECK-NEXT: [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24> -; CHECK-NEXT: [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]] -; CHECK-NEXT: ret <3 x i24> [[R]] +; CHECK-NEXT: ret <3 x i24> poison ; %c = insertelement <9 x i8> poison, i8 24, i64 0 %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> @@ -279,12 +289,18 @@ ret <3 x i24> %r } +; The shift amount is known 23 on little-endian and known 23<<16 on big-endian +; across all vector elements, so it's an overshift for big-endian. + define <3 x i24> @bitcast_overshift_vector_bigend(<9 x i8> %v1, <3 x i24> %v2) { -; CHECK-LABEL: @bitcast_overshift_vector_bigend( -; CHECK-NEXT: [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> , <9 x i32> -; CHECK-NEXT: [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24> -; CHECK-NEXT: [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]] -; CHECK-NEXT: ret <3 x i24> [[R]] +; BIGENDIAN-LABEL: @bitcast_overshift_vector_bigend( +; BIGENDIAN-NEXT: ret <3 x i24> poison +; +; LITTLEENDIAN-LABEL: @bitcast_overshift_vector_bigend( +; LITTLEENDIAN-NEXT: [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> , <9 x i32> +; LITTLEENDIAN-NEXT: [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24> +; LITTLEENDIAN-NEXT: [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]] +; LITTLEENDIAN-NEXT: ret <3 x i24> [[R]] ; %c = insertelement <9 x i8> poison, i8 23, i64 0 %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> @@ -293,12 +309,18 @@ ret <3 x i24> %r } +; The shift amount is known 23 on big-endian and known 23<<16 on little-endian +; across all vector elements, so it's an overshift for little-endian. + define <3 x i24> @bitcast_overshift_vector_littleend(<9 x i8> %v1, <3 x i24> %v2) { -; CHECK-LABEL: @bitcast_overshift_vector_littleend( -; CHECK-NEXT: [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> , <9 x i32> -; CHECK-NEXT: [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24> -; CHECK-NEXT: [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]] -; CHECK-NEXT: ret <3 x i24> [[R]] +; BIGENDIAN-LABEL: @bitcast_overshift_vector_littleend( +; BIGENDIAN-NEXT: [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> , <9 x i32> +; BIGENDIAN-NEXT: [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24> +; BIGENDIAN-NEXT: [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]] +; BIGENDIAN-NEXT: ret <3 x i24> [[R]] +; +; LITTLEENDIAN-LABEL: @bitcast_overshift_vector_littleend( +; LITTLEENDIAN-NEXT: ret <3 x i24> poison ; %c = insertelement <9 x i8> poison, i8 23, i64 0 %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> @@ -307,6 +329,8 @@ ret <3 x i24> %r } +; Negative test - the shift amount is known 24 or 24<<16 on only 2 out of 3 elements. + define <3 x i24> @bitcast_partial_overshift_vector(<9 x i8> %v1, <3 x i24> %v2) { ; CHECK-LABEL: @bitcast_partial_overshift_vector( ; CHECK-NEXT: [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> , <9 x i32> @@ -321,6 +345,8 @@ ret <3 x i24> %r } +; Negative test - don't know how to look through a cast with non-integer type (but we could handle this...). + define <1 x i64> @bitcast_noshift_vector_wrong_type(<2 x float> %v1, <1 x i64> %v2) { ; CHECK-LABEL: @bitcast_noshift_vector_wrong_type( ; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x float> [[V1:%.*]], <2 x float> , <2 x i32>