diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9957,9 +9957,13 @@ // Adjust IndicesVec to match VT size. assert(IndicesVec.getValueType().getVectorNumElements() >= NumElts && "Illegal variable permute mask size"); - if (IndicesVec.getValueType().getVectorNumElements() > NumElts) - IndicesVec = extractSubVector(IndicesVec, 0, DAG, SDLoc(IndicesVec), - NumElts * VT.getScalarSizeInBits()); + if (IndicesVec.getValueType().getVectorNumElements() > NumElts) { + if (IndicesVec.getValueSizeInBits() == SizeInBits) + IndicesVec = DAG.getBitcast(IndicesVT, IndicesVec); + else + IndicesVec = extractSubVector(IndicesVec, 0, DAG, SDLoc(IndicesVec), + NumElts * VT.getScalarSizeInBits()); + } IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT); // Handle SrcVec that don't match VT type. diff --git a/llvm/test/CodeGen/X86/vector-extract-bitcast.ll b/llvm/test/CodeGen/X86/vector-extract-bitcast.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/vector-extract-bitcast.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s + +define void @_Z1fv() local_unnamed_addr { +; CHECK-LABEL: _Z1fv: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3] +; CHECK-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vmovapd (%rax), %xmm1 +; CHECK-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpermilpd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vmovupd %xmm0, (%rax) +; CHECK-NEXT: retq +bb: + %0 = load <4 x i64>, <4 x i64>* undef, align 32 + %1 = bitcast <4 x i64> %0 to <8 x i32> + %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <2 x i32> + %3 = and <2 x i32> %2, + %4 = extractelement <2 x i32> %3, i32 0 + %vecext.i8.1 = extractelement <4 x i64> %0, i32 %4 + %5 = extractelement <2 x i32> %3, i32 1 + %vecext.i8.2 = extractelement <4 x i64> %0, i32 %5 + %6 = insertelement <2 x i64> poison, i64 %vecext.i8.1, i32 0 + %7 = insertelement <2 x i64> %6, i64 %vecext.i8.2, i32 1 + %8 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> %7 + store <2 x i64> %8, <2 x i64>* undef, align 8 + ret void +} +