Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1715,6 +1715,40 @@ return Result; } +/// Given a bitcasted vector fed into an extract element instruction and then +/// bitcasted again, eliminate at least one bitcast by changing the vector type +/// of the extractelement instruction. +/// Example: +/// bitcast (extractelement (bitcast <2 x float> %X to <2 x i32>), 1) to float +/// ---> +/// extractelement <2 x float> %X, i32 1 +static Instruction *foldBitCastExtElt(BitCastInst &BitCast, InstCombiner &IC, + const DataLayout &DL) { + // TODO: Create and use a pattern matcher for ExtractElementInst. + auto *ExtElt = dyn_cast(BitCast.getOperand(0)); + if (!ExtElt || !ExtElt->hasOneUse()) + return nullptr; + + Value *InnerBitCast = nullptr; + if (!match(ExtElt->getOperand(0), m_BitCast(m_Value(InnerBitCast)))) + return nullptr; + + VectorType *VecType = cast(InnerBitCast->getType()); + Type *DestType = BitCast.getType(); + + // If the element type of the vector doesn't match the result type, + // bitcast it to a vector type that we can extract from. + if (VecType->getElementType() != DestType) { + unsigned VecWidth = VecType->getPrimitiveSizeInBits(); + unsigned DestWidth = DestType->getPrimitiveSizeInBits(); + unsigned NumElts = VecWidth / DestWidth; + VecType = VectorType::get(DestType, NumElts); + InnerBitCast = IC.Builder->CreateBitCast(InnerBitCast, VecType, "bc"); + } + + return ExtractElementInst::Create(InnerBitCast, ExtElt->getOperand(1)); +} + static Instruction *foldVecTruncToExtElt(Value *VecInput, Type *DestTy, unsigned ShiftAmt, InstCombiner &IC, const DataLayout &DL) { @@ -1886,6 +1920,9 @@ } } + if (Instruction *I = foldBitCastExtElt(CI, *this, DL)) + return I; + if (SrcTy->isPointerTy()) return commonPointerCastTransforms(CI); return commonCastTransforms(CI); Index: llvm/trunk/test/Transforms/InstCombine/bitcast.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/bitcast.ll +++ llvm/trunk/test/Transforms/InstCombine/bitcast.ll @@ -64,7 +64,7 @@ ; CHECK-NEXT: ret float %add } -; TODO: Both bitcasts are unnecessary; change the extractelement. +; Both bitcasts are unnecessary; change the extractelement. define float @bitcast_extelt1(<2 x float> %A) { %bc1 = bitcast <2 x float> %A to <2 x i32> @@ -73,13 +73,11 @@ ret float %bc2 ; CHECK-LABEL: @bitcast_extelt1( -; CHECK-NEXT: %bc1 = bitcast <2 x float> %A to <2 x i32> -; CHECK-NEXT: %ext = extractelement <2 x i32> %bc1, i32 0 -; CHECK-NEXT: %bc2 = bitcast i32 %ext to float +; CHECK-NEXT: %bc2 = extractelement <2 x float> %A, i32 0 ; CHECK-NEXT: ret float %bc2 } -; TODO: Second bitcast can be folded into the first. +; Second bitcast can be folded into the first. define i64 @bitcast_extelt2(<4 x float> %A) { %bc1 = bitcast <4 x float> %A to <2 x double> @@ -88,9 +86,8 @@ ret i64 %bc2 ; CHECK-LABEL: @bitcast_extelt2( -; CHECK-NEXT: %bc1 = bitcast <4 x float> %A to <2 x double> -; CHECK-NEXT: %ext = extractelement <2 x double> %bc1, i32 1 -; CHECK-NEXT: %bc2 = bitcast double %ext to i64 +; CHECK-NEXT: %bc = bitcast <4 x float> %A to <2 x i64> +; CHECK-NEXT: %bc2 = extractelement <2 x i64> %bc, i32 1 ; CHECK-NEXT: ret i64 %bc2 }