Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -9031,10 +9031,14 @@ case X86::BI__builtin_ia32_vec_ext_v32qi: case X86::BI__builtin_ia32_vec_ext_v16hi: case X86::BI__builtin_ia32_vec_ext_v8si: - case X86::BI__builtin_ia32_vec_ext_v4di: + case X86::BI__builtin_ia32_vec_ext_v4di: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + uint64_t Index = cast(Ops[1])->getZExtValue(); + Index &= NumElts - 1; // These builtins exist so we can ensure the index is an ICE and in range. // Otherwise we could just do this in the header file. - return Builder.CreateExtractElement(Ops[0], Ops[1]); + return Builder.CreateExtractElement(Ops[0], Index); + } case X86::BI__builtin_ia32_vec_set_v16qi: case X86::BI__builtin_ia32_vec_set_v8hi: case X86::BI__builtin_ia32_vec_set_v4si: @@ -9042,10 +9046,14 @@ case X86::BI__builtin_ia32_vec_set_v32qi: case X86::BI__builtin_ia32_vec_set_v16hi: case X86::BI__builtin_ia32_vec_set_v8si: - case X86::BI__builtin_ia32_vec_set_v4di: + case X86::BI__builtin_ia32_vec_set_v4di: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned Index = cast(Ops[2])->getZExtValue(); + Index &= NumElts - 1; // These builtins exist so we can ensure the index is an ICE and in range. // Otherwise we could just do this in the header file. - return Builder.CreateInsertElement(Ops[0], Ops[1], Ops[2]); + return Builder.CreateInsertElement(Ops[0], Ops[1], Index); + } case X86::BI_mm_setcsr: case X86::BI__builtin_ia32_ldmxcsr: { Address Tmp = CreateMemTemp(E->getArg(0)->getType()); @@ -9311,8 +9319,7 @@ // extract (0, 1) unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; - llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); - Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); + Ops[1] = Builder.CreateExtractElement(Ops[1], Index, "extract"); // cast pointer to i64 & store Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); @@ -9336,7 +9343,12 @@ case X86::BI__builtin_ia32_extracti64x2_512_mask: { llvm::Type *DstTy = ConvertType(E->getType()); unsigned NumElts = DstTy->getVectorNumElements(); - unsigned Index = cast(Ops[1])->getZExtValue() * NumElts; + unsigned SrcNumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned SubVectors = SrcNumElts / NumElts; + unsigned Index = cast(Ops[1])->getZExtValue(); + assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); + Index &= SubVectors - 1; // Remove any extra bits. + Index *= NumElts; uint32_t Indices[16]; for (unsigned i = 0; i != NumElts; ++i) @@ -9370,7 +9382,11 @@ case X86::BI__builtin_ia32_inserti64x2_512: { unsigned DstNumElts = Ops[0]->getType()->getVectorNumElements(); unsigned SrcNumElts = Ops[1]->getType()->getVectorNumElements(); - unsigned Index = cast(Ops[2])->getZExtValue() * SrcNumElts; + unsigned SubVectors = DstNumElts / SrcNumElts; + unsigned Index = cast(Ops[2])->getZExtValue(); + assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); + Index &= SubVectors - 1; // Remove any extra bits. + Index *= SrcNumElts; uint32_t Indices[16]; for (unsigned i = 0; i != DstNumElts; ++i) @@ -9571,7 +9587,7 @@ case X86::BI__builtin_ia32_palignr128: case X86::BI__builtin_ia32_palignr256: case X86::BI__builtin_ia32_palignr512: { - unsigned ShiftVal = cast(Ops[2])->getZExtValue(); + unsigned ShiftVal = cast(Ops[2])->getZExtValue() & 0xff; unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); assert(NumElts % 16 == 0); @@ -9611,7 +9627,7 @@ case X86::BI__builtin_ia32_alignq256: case X86::BI__builtin_ia32_alignq512: { unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); - unsigned ShiftVal = cast(Ops[2])->getZExtValue(); + unsigned ShiftVal = cast(Ops[2])->getZExtValue() & 0xff; // Mask the shift amount to width of two vectors. ShiftVal &= (2 * NumElts) - 1; @@ -9696,7 +9712,7 @@ case X86::BI__builtin_ia32_pslldqi128_byteshift: case X86::BI__builtin_ia32_pslldqi256_byteshift: case X86::BI__builtin_ia32_pslldqi512_byteshift: { - unsigned ShiftVal = cast(Ops[1])->getZExtValue(); + unsigned ShiftVal = cast(Ops[1])->getZExtValue() & 0xff; llvm::Type *ResultType = Ops[0]->getType(); // Builtin type is vXi64 so multiply by 8 to get bytes. unsigned NumElts = ResultType->getVectorNumElements() * 8; @@ -9726,7 +9742,7 @@ case X86::BI__builtin_ia32_psrldqi128_byteshift: case X86::BI__builtin_ia32_psrldqi256_byteshift: case X86::BI__builtin_ia32_psrldqi512_byteshift: { - unsigned ShiftVal = cast(Ops[1])->getZExtValue(); + unsigned ShiftVal = cast(Ops[1])->getZExtValue() & 0xff; llvm::Type *ResultType = Ops[0]->getType(); // Builtin type is vXi64 so multiply by 8 to get bytes. unsigned NumElts = ResultType->getVectorNumElements() * 8; @@ -10170,7 +10186,7 @@ case X86::BI__builtin_ia32_cmpps256: case X86::BI__builtin_ia32_cmppd: case X86::BI__builtin_ia32_cmppd256: { - unsigned CC = cast(Ops[2])->getZExtValue(); + unsigned CC = cast(Ops[2])->getZExtValue() & 0x1f; // If this one of the SSE immediates, we can use native IR. if (CC < 8) { FCmpInst::Predicate Pred; Index: test/CodeGen/avx-builtins.c =================================================================== --- test/CodeGen/avx-builtins.c +++ test/CodeGen/avx-builtins.c @@ -316,27 +316,27 @@ int test_mm256_extract_epi8(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi8 - // CHECK: extractelement <32 x i8> %{{.*}}, i32 31 + // CHECK: extractelement <32 x i8> %{{.*}}, {{i32|i64}} 31 // CHECK: zext i8 %{{.*}} to i32 return _mm256_extract_epi8(A, 31); } int test_mm256_extract_epi16(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi16 - // CHECK: extractelement <16 x i16> %{{.*}}, i32 15 + // CHECK: extractelement <16 x i16> %{{.*}}, {{i32|i64}} 15 // CHECK: zext i16 %{{.*}} to i32 return _mm256_extract_epi16(A, 15); } int test_mm256_extract_epi32(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi32 - // CHECK: extractelement <8 x i32> %{{.*}}, i32 7 + // CHECK: extractelement <8 x i32> %{{.*}}, {{i32|i64}} 7 return _mm256_extract_epi32(A, 7); } long long test_mm256_extract_epi64(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi64 - // CHECK: extractelement <4 x i64> %{{.*}}, i32 3 + // CHECK: extractelement <4 x i64> %{{.*}}, {{i32|i64}} 3 return _mm256_extract_epi64(A, 3); } @@ -396,25 +396,25 @@ __m256i test_mm256_insert_epi8(__m256i x, char b) { // CHECK-LABEL: test_mm256_insert_epi8 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 14 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, {{i32|i64}} 14 return _mm256_insert_epi8(x, b, 14); } __m256i test_mm256_insert_epi16(__m256i x, int b) { // CHECK-LABEL: test_mm256_insert_epi16 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 4 + // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, {{i32|i64}} 4 return _mm256_insert_epi16(x, b, 4); } __m256i test_mm256_insert_epi32(__m256i x, int b) { // CHECK-LABEL: test_mm256_insert_epi32 - // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 5 + // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, {{i32|i64}} 5 return _mm256_insert_epi32(x, b, 5); } __m256i test_mm256_insert_epi64(__m256i x, long long b) { // CHECK-LABEL: test_mm256_insert_epi64 - // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 2 + // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, {{i32|i64}} 2 return _mm256_insert_epi64(x, b, 2); } Index: test/CodeGen/sse2-builtins.c =================================================================== --- test/CodeGen/sse2-builtins.c +++ test/CodeGen/sse2-builtins.c @@ -613,14 +613,14 @@ // Lowering to pextrw requires optimization. int test_mm_extract_epi16(__m128i A) { // CHECK-LABEL: test_mm_extract_epi16 - // CHECK: extractelement <8 x i16> %{{.*}}, i32 1 + // CHECK: extractelement <8 x i16> %{{.*}}, {{i32|i64}} 1 // CHECK: zext i16 %{{.*}} to i32 return _mm_extract_epi16(A, 1); } __m128i test_mm_insert_epi16(__m128i A, int B) { // CHECK-LABEL: test_mm_insert_epi16 - // CHECK: insertelement <8 x i16> %{{.*}}, i32 0 + // CHECK: insertelement <8 x i16> %{{.*}}, {{i32|i64}} 0 return _mm_insert_epi16(A, B, 0); } Index: test/CodeGen/sse41-builtins.c =================================================================== --- test/CodeGen/sse41-builtins.c +++ test/CodeGen/sse41-builtins.c @@ -171,26 +171,26 @@ int test_mm_extract_epi8(__m128i x) { // CHECK-LABEL: test_mm_extract_epi8 - // CHECK: extractelement <16 x i8> %{{.*}}, i32 1 + // CHECK: extractelement <16 x i8> %{{.*}}, {{i32|i64}} 1 // CHECK: zext i8 %{{.*}} to i32 return _mm_extract_epi8(x, 1); } int test_mm_extract_epi32(__m128i x) { // CHECK-LABEL: test_mm_extract_epi32 - // CHECK: extractelement <4 x i32> %{{.*}}, i32 1 + // CHECK: extractelement <4 x i32> %{{.*}}, {{i32|i64}} 1 return _mm_extract_epi32(x, 1); } long long test_mm_extract_epi64(__m128i x) { // CHECK-LABEL: test_mm_extract_epi64 - // CHECK: extractelement <2 x i64> %{{.*}}, i32 1 + // CHECK: extractelement <2 x i64> %{{.*}}, {{i32|i64}} 1 return _mm_extract_epi64(x, 1); } int test_mm_extract_ps(__m128 x) { // CHECK-LABEL: test_mm_extract_ps - // CHECK: extractelement <4 x float> %{{.*}}, i32 1 + // CHECK: extractelement <4 x float> %{{.*}}, {{i32|i64}} 1 return _mm_extract_ps(x, 1); } @@ -220,19 +220,19 @@ __m128i test_mm_insert_epi8(__m128i x, char b) { // CHECK-LABEL: test_mm_insert_epi8 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, {{i32|i64}} 1 return _mm_insert_epi8(x, b, 1); } __m128i test_mm_insert_epi32(__m128i x, int b) { // CHECK-LABEL: test_mm_insert_epi32 - // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1 + // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, {{i32|i64}} 1 return _mm_insert_epi32(x, b, 1); } __m128i test_mm_insert_epi64(__m128i x, long long b) { // CHECK-LABEL: test_mm_insert_epi64 - // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 + // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, {{i32|i64}} 1 return _mm_insert_epi64(x, b, 1); } Index: test/CodeGen/vector.c =================================================================== --- test/CodeGen/vector.c +++ test/CodeGen/vector.c @@ -45,13 +45,13 @@ unsigned long test_epi8(__m128i x) { return _mm_extract_epi8(x, 4); } // CHECK: @test_epi8 -// CHECK: extractelement <16 x i8> {{.*}}, i32 4 +// CHECK: extractelement <16 x i8> {{.*}}, {{i32|i64}} 4 // CHECK: zext i8 {{.*}} to i32 unsigned long test_epi16(__m128i x) { return _mm_extract_epi16(x, 3); } // CHECK: @test_epi16 -// CHECK: extractelement <8 x i16> {{.*}}, i32 3 +// CHECK: extractelement <8 x i16> {{.*}}, {{i32|i64}} 3 // CHECK: zext i16 {{.*}} to i32 void extractinttypes() {