Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3388,6 +3388,10 @@ if (N1.getOpcode() == ISD::UNDEF) return getUNDEF(VT); + // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF + if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements()) + return getUNDEF(VT); + // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is // expanding copies of large vectors from registers. if (N2C && Index: test/CodeGen/X86/extract-store.ll =================================================================== --- test/CodeGen/X86/extract-store.ll +++ test/CodeGen/X86/extract-store.ll @@ -1,7 +1,8 @@ ; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse4.1 | FileCheck %s -check-prefix=SSE41 ; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX -define void @pextrb(i8* nocapture %dst, <16 x i8> %foo) { +; CHECK-LABEL: extract_i8 +define void @extract_i8(i8* nocapture %dst, <16 x i8> %foo) { ; AVX: vpextrb ; SSE41: pextrb ; AVX-NOT: movb @@ -11,12 +12,35 @@ ret void } -define void @pextrw(i16* nocapture %dst, <8 x i16> %foo) { +; CHECK-LABEL: extract_i16 +define void @extract_i16(i16* nocapture %dst, <8 x i16> %foo) { ; AVX: vpextrw ; SSE41: pextrw ; AVX-NOT: movw ; SSE41-NOT: movw - %vecext = extractelement <8 x i16> %foo, i32 15 + %vecext = extractelement <8 x i16> %foo, i32 7 + store i16 %vecext, i16* %dst, align 1 + ret void +} + +; CHECK-LABEL: extract_i8_undef +define void @extract_i8_undef(i8* nocapture %dst, <16 x i8> %foo) { +; AVX-NOT: vpextrb +; SSE41-NOT: pextrb +; AVX-NOT: movb +; SSE41-NOT: movb + %vecext = extractelement <16 x i8> %foo, i32 16 ; undef + store i8 %vecext, i8* %dst, align 1 + ret void +} + +; CHECK-LABEL: extract_i16_undef +define void @extract_i16_undef(i16* nocapture %dst, <8 x i16> %foo) { +; AVX-NOT: vpextrw +; SSE41-NOT: pextrw +; AVX-NOT: movw +; SSE41-NOT: movw + %vecext = extractelement <8 x i16> %foo, i32 9 ; undef store i16 %vecext, i16* %dst, align 1 ret void } Index: test/CodeGen/X86/extractelement-index.ll =================================================================== --- test/CodeGen/X86/extractelement-index.ll +++ test/CodeGen/X86/extractelement-index.ll @@ -1,20 +1,51 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX -; CHECK-LABEL: extractelement_index_i256_1: -define i8 @extractelement_index_i256_1(<32 x i8> %a) nounwind { + +; CHECK-LABEL: extractelement_index_1: +define i8 @extractelement_index_1(<32 x i8> %a) nounwind { + ; X64: movaps + ; AVX: vpextrb $1 %b = extractelement <32 x i8> %a, i256 1 ret i8 %b } -; CHECK-LABEL: extractelement_index_i256_2: -define i8 @extractelement_index_i256_2(<32 x i8> %a) nounwind { - %b = extractelement <32 x i8> %a, i256 60 - ret i8 %b +; CHECK-LABEL: extractelement_index_2: +define i32 @extractelement_index_2(<8 x i32> %a) nounwind { + ; X64: pshufd + ; AVX: vextractf128 $1 + ; AVX-NEXT: vpextrd $3 + %b = extractelement <8 x i32> %a, i64 7 + ret i32 %b +} + +; CHECK-LABEL: extractelement_index_3: +define i32 @extractelement_index_3(<8 x i32> %a) nounwind { + ; CHECK-NOT: pextr + %b = extractelement <8 x i32> %a, i64 15 + ret i32 %b } -; CHECK-LABEL: extractelement_index_i256_3: -define i8 @extractelement_index_i256_3(<32 x i8> %a, i256 %i) nounwind { +; CHECK-LABEL: extractelement_index_4: +define i32 @extractelement_index_4(<8 x i32> %a) nounwind { + ; X64: movd + ; AVX: vextractf128 $1 + ; AVX-NEXT: vmovd + %b = extractelement <8 x i32> %a, i256 4 + ret i32 %b +} + +; CHECK-LABEL: extractelement_index_5: +define i8 @extractelement_index_5(<32 x i8> %a, i256 %i) nounwind { + ; X64: movaps + ; AVX: vmovaps %b = extractelement <32 x i8> %a, i256 %i ret i8 %b } +; CHECK-LABEL: extractelement_index_6: +define i8 @extractelement_index_6(<32 x i8> %a) nounwind { + ; CHECK-NOT: pextr + %b = extractelement <32 x i8> %a, i256 -1 + ret i8 %b +} \ No newline at end of file