diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -843,6 +843,37 @@ if (Instruction *I = foldVecTruncToExtElt(CI, *this)) return I; + /// Whenever an element is extracted from a vector, and then truncated, + /// canonicalize by converting it to a bitcast followed by an + /// extractelement. + /// + /// Example (little endian): + /// trunc (extractelement <4 x i64> %X, 0) to i32 + /// ---> + /// extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0 + if (auto *ExtElt = dyn_cast(CI.getOperand(0))) { + Value *VecOp = ExtElt->getVectorOperand(); + Type *VecOpTy = VecOp->getType(); + unsigned DestScalarSize = DestTy->getScalarSizeInBits(); + unsigned VecOpScalarSize = VecOpTy->getScalarSizeInBits(); + unsigned BitCastNumElts = + VecOpTy->getVectorNumElements() * VecOpScalarSize / DestScalarSize; + + ConstantInt *Cst = nullptr; + if (match(ExtElt->getIndexOperand(), m_ConstantInt(Cst))) { + + unsigned VecOpIdx = Cst->getZExtValue(); + unsigned NewIdx = + DL.isBigEndian() + ? (VecOpIdx + 1) * VecOpScalarSize / DestScalarSize - 1 + : VecOpIdx * VecOpScalarSize / DestScalarSize; + + Type *BitCastTo = VectorType::get(DestTy, BitCastNumElts); + Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo); + return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx)); + } + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/pr45314_be.ll b/llvm/test/Transforms/InstCombine/pr45314_be.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/pr45314_be.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +define i32 @shrinkExtractElt_i64_to_i32_0(<3 x i64> %x) { +; CHECK-LABEL: @shrinkExtractElt_i64_to_i32_0( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32> +; CHECK-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 1 +; CHECK-NEXT: ret i32 [[T]] +; + %e = extractelement <3 x i64> %x, i32 0 + %t = trunc i64 %e to i32 + ret i32 %t +} + +define i32 @shrinkExtractElt_i64_to_i32_1(<3 x i64> %x) { +; CHECK-LABEL: @shrinkExtractElt_i64_to_i32_1( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32> +; CHECK-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 3 +; CHECK-NEXT: ret i32 [[T]] +; + %e = extractelement <3 x i64> %x, i32 1 + %t = trunc i64 %e to i32 + ret i32 %t +} + +define i32 @shrinkExtractElt_i64_to_i32_2(<3 x i64> %x) { +; CHECK-LABEL: @shrinkExtractElt_i64_to_i32_2( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32> +; CHECK-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 5 +; CHECK-NEXT: ret i32 [[T]] +; + %e = extractelement <3 x i64> %x, i32 2 + %t = trunc i64 %e to i32 + ret i32 %t +} + +define i16 @shrinkExtractElt_i64_to_i16_0(<3 x i64> %x) { +; CHECK-LABEL: @shrinkExtractElt_i64_to_i16_0( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16> +; CHECK-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 3 +; CHECK-NEXT: ret i16 [[T]] +; + %e = extractelement <3 x i64> %x, i16 0 + %t = trunc i64 %e to i16 + ret i16 %t +} + +define i16 @shrinkExtractElt_i64_to_i16_1(<3 x i64> %x) { +; CHECK-LABEL: @shrinkExtractElt_i64_to_i16_1( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16> +; CHECK-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 7 +; CHECK-NEXT: ret i16 [[T]] +; + %e = extractelement <3 x i64> %x, i16 1 + %t = trunc i64 %e to i16 + ret i16 %t +} + +define i16 @shrinkExtractElt_i64_to_i16_2(<3 x i64> %x) { +; CHECK-LABEL: @shrinkExtractElt_i64_to_i16_2( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16> +; CHECK-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 11 +; CHECK-NEXT: ret i16 [[T]] +; + %e = extractelement <3 x i64> %x, i16 2 + %t = trunc i64 %e to i16 + ret i16 %t +} diff --git a/llvm/test/Transforms/InstCombine/pr45314_le.ll b/llvm/test/Transforms/InstCombine/pr45314_le.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/pr45314_le.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +define i32 @shrinkExtractElt_i64_to_i32_0(<3 x i64> %x) { +; CHECK-LABEL: @shrinkExtractElt_i64_to_i32_0( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32> +; CHECK-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: ret i32 [[T]] +; + %e = extractelement <3 x i64> %x, i32 0 + %t = trunc i64 %e to i32 + ret i32 %t +} + +define i32 @shrinkExtractElt_i64_to_i32_1(<3 x i64> %x) { +; CHECK-LABEL: @shrinkExtractElt_i64_to_i32_1( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32> +; CHECK-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 2 +; CHECK-NEXT: ret i32 [[T]] +; + %e = extractelement <3 x i64> %x, i32 1 + %t = trunc i64 %e to i32 + ret i32 %t +} + +define i32 @shrinkExtractElt_i64_to_i32_2(<3 x i64> %x) { +; CHECK-LABEL: @shrinkExtractElt_i64_to_i32_2( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32> +; CHECK-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 4 +; CHECK-NEXT: ret i32 [[T]] +; + %e = extractelement <3 x i64> %x, i32 2 + %t = trunc i64 %e to i32 + ret i32 %t +} + +define i16 @shrinkExtractElt_i64_to_i16_0(<3 x i64> %x) { +; CHECK-LABEL: @shrinkExtractElt_i64_to_i16_0( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16> +; CHECK-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 0 +; CHECK-NEXT: ret i16 [[T]] +; + %e = extractelement <3 x i64> %x, i16 0 + %t = trunc i64 %e to i16 + ret i16 %t +} + +define i16 @shrinkExtractElt_i64_to_i16_1(<3 x i64> %x) { +; CHECK-LABEL: @shrinkExtractElt_i64_to_i16_1( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16> +; CHECK-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 4 +; CHECK-NEXT: ret i16 [[T]] +; + %e = extractelement <3 x i64> %x, i16 1 + %t = trunc i64 %e to i16 + ret i16 %t +} + +define i16 @shrinkExtractElt_i64_to_i16_2(<3 x i64> %x) { +; CHECK-LABEL: @shrinkExtractElt_i64_to_i16_2( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16> +; CHECK-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 8 +; CHECK-NEXT: ret i16 [[T]] +; + %e = extractelement <3 x i64> %x, i16 2 + %t = trunc i64 %e to i16 + ret i16 %t +}