Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -775,7 +775,16 @@ // TODO: eventually we should lower this intrinsic to IR if (auto CIWidth = dyn_cast(II->getArgOperand(2))) { if (auto CIStart = dyn_cast(II->getArgOperand(3))) { - if (CIWidth->equalsInt(64) && CIStart->isZero()) { + unsigned Index = CIStart->getZExtValue(); + // A value of zero in the field length is defined as length of 64. + unsigned Length = CIWidth->equalsInt(0) ? 64 : CIWidth->getZExtValue(); + + // If the sum of the bit index + length field is greater than 64, the + // results are undefined. + if ((Index + Length) > 64) + return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + + if (Length == 64 && Index == 0) { Value *Vec = II->getArgOperand(1); Value *Undef = UndefValue::get(Vec->getType()); const uint32_t Mask[] = { 0, 2 }; Index: test/Transforms/InstCombine/vec_demanded_elts.ll =================================================================== --- test/Transforms/InstCombine/vec_demanded_elts.ll +++ test/Transforms/InstCombine/vec_demanded_elts.ll @@ -303,6 +303,33 @@ ret <2 x i64> %2 } +; CHECK: define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) +define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) { +; CHECK: ret <2 x i64> %i + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0) + ret <2 x i64> %1 +} + +; CHECK: define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) +define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) { +; CHECK: ret <2 x i64> undef + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16) + ret <2 x i64> %1 +} + +; CHECK: define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) +define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) { +; CHECK: ret <2 x i64> undef + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32) + ret <2 x i64> %1 +} + +; CHECK: define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) +define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) { +; CHECK: ret <2 x i64> undef + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16) + ret <2 x i64> %1 +} ; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind