Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -430,6 +430,50 @@ return false; } +/// Given a vector that is bitcast to an integer, optionally logically +/// right-shifted, and truncated, convert it to an extractelement. +/// Example (big endian): +/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32 +/// ---> +/// extractelement <4 x i32> %X, 1 +static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC, + const DataLayout &DL) { + Value *TruncOp = Trunc.getOperand(0); + Type *DestType = Trunc.getType(); + if (!TruncOp->hasOneUse() || !isa(DestType)) + return nullptr; + + Value *VecInput = nullptr; + ConstantInt *ShiftVal = nullptr; + if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)), + m_LShr(m_BitCast(m_Value(VecInput)), + m_ConstantInt(ShiftVal)))) || + !isa(VecInput->getType())) + return nullptr; + + VectorType *VecType = cast(VecInput->getType()); + unsigned VecWidth = VecType->getPrimitiveSizeInBits(); + unsigned DestWidth = DestType->getPrimitiveSizeInBits(); + unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0; + + if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0)) + return nullptr; + + // If the element type of the vector doesn't match the result type, + // bitcast it to a vector type that we can extract from. + unsigned NumVecElts = VecWidth / DestWidth; + if (VecType->getElementType() != DestType) { + VecType = VectorType::get(DestType, NumVecElts); + VecInput = IC.Builder->CreateBitCast(VecInput, VecType, "bc"); + } + + unsigned Elt = ShiftAmount / DestWidth; + if (DL.isBigEndian()) + Elt = NumVecElts - 1 - Elt; + + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); +} + Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *Result = commonCastTransforms(CI)) return Result; @@ -528,6 +572,9 @@ ConstantExpr::getTrunc(Cst, DestTy)); } + if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL)) + return I; + return nullptr; } @@ -1740,56 +1787,6 @@ return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand()); } -static Instruction *foldVecTruncToExtElt(Value *VecInput, Type *DestTy, - unsigned ShiftAmt, InstCombiner &IC, - const DataLayout &DL) { - VectorType *VecTy = cast(VecInput->getType()); - unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); - unsigned VecWidth = VecTy->getPrimitiveSizeInBits(); - - if ((VecWidth % DestWidth != 0) || (ShiftAmt % DestWidth != 0)) - return nullptr; - - // If the element type of the vector doesn't match the result type, - // bitcast it to be a vector type we can extract from. - unsigned NumVecElts = VecWidth / DestWidth; - if (VecTy->getElementType() != DestTy) { - VecTy = VectorType::get(DestTy, NumVecElts); - VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); - } - - unsigned Elt = ShiftAmt / DestWidth; - if (DL.isBigEndian()) - Elt = NumVecElts - 1 - Elt; - - return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); -} - -/// See if we can optimize an integer->float/double bitcast. -/// The various long double bitcasts can't get in here. -static Instruction *optimizeIntToFloatBitCast(BitCastInst &CI, InstCombiner &IC, - const DataLayout &DL) { - Value *Src = CI.getOperand(0); - Type *DstTy = CI.getType(); - - // If this is a bitcast from int to float, check to see if the int is an - // extraction from a vector. - Value *VecInput = nullptr; - // bitcast(trunc(bitcast(somevector))) - if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) && - isa(VecInput->getType())) - return foldVecTruncToExtElt(VecInput, DstTy, 0, IC, DL); - - // bitcast(trunc(lshr(bitcast(somevector), cst)) - ConstantInt *ShAmt = nullptr; - if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), - m_ConstantInt(ShAmt)))) && - isa(VecInput->getType())) - return foldVecTruncToExtElt(VecInput, DstTy, ShAmt->getZExtValue(), IC, DL); - - return nullptr; -} - Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the operands are integer typed then apply the integer transforms, // otherwise just apply the common ones. @@ -1833,11 +1830,6 @@ } } - // Try to optimize int -> float bitcasts. - if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa(SrcTy)) - if (Instruction *I = optimizeIntToFloatBitCast(CI, *this, DL)) - return I; - if (VectorType *DestVTy = dyn_cast(DestTy)) { if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); Index: test/Transforms/InstCombine/trunc.ll =================================================================== --- test/Transforms/InstCombine/trunc.ll +++ test/Transforms/InstCombine/trunc.ll @@ -121,7 +121,7 @@ ; PR25543 ; https://llvm.org/bugs/show_bug.cgi?id=25543 -; TODO: This could be extractelement. +; This is an extractelement. define i32 @trunc_bitcast1(<4 x i32> %v) { %bc = bitcast <4 x i32> %v to i128 @@ -130,13 +130,11 @@ ret i32 %ext ; CHECK-LABEL: @trunc_bitcast1( -; CHECK-NEXT: %bc = bitcast <4 x i32> %v to i128 -; CHECK-NEXT: %shr = lshr i128 %bc, 32 -; CHECK-NEXT: %ext = trunc i128 %shr to i32 +; CHECK-NEXT: %ext = extractelement <4 x i32> %v, i32 1 ; CHECK-NEXT: ret i32 %ext } -; TODO: This could be bitcast + extractelement. +; A bitcast may still be required. define i32 @trunc_bitcast2(<2 x i64> %v) { %bc = bitcast <2 x i64> %v to i128 @@ -145,13 +143,12 @@ ret i32 %ext ; CHECK-LABEL: @trunc_bitcast2( -; CHECK-NEXT: %bc = bitcast <2 x i64> %v to i128 -; CHECK-NEXT: %shr = lshr i128 %bc, 64 -; CHECK-NEXT: %ext = trunc i128 %shr to i32 +; CHECK-NEXT: %bc1 = bitcast <2 x i64> %v to <4 x i32> +; CHECK-NEXT: %ext = extractelement <4 x i32> %bc1, i32 2 ; CHECK-NEXT: ret i32 %ext } -; TODO: The shift is optional. This could be extractelement. +; The right shift is optional. define i32 @trunc_bitcast3(<4 x i32> %v) { %bc = bitcast <4 x i32> %v to i128 @@ -159,8 +156,7 @@ ret i32 %ext ; CHECK-LABEL: @trunc_bitcast3( -; CHECK-NEXT: %bc = bitcast <4 x i32> %v to i128 -; CHECK-NEXT: %ext = trunc i128 %bc to i32 +; CHECK-NEXT: %ext = extractelement <4 x i32> %v, i32 0 ; CHECK-NEXT: ret i32 %ext }