Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -576,6 +576,24 @@ break; } + case Intrinsic::x86_sse4a_insertqi: { + // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top + // ones undef + if (ConstantInt *Width = dyn_cast(II->getArgOperand(2))) + if (ConstantInt *Start = dyn_cast(II->getArgOperand(3))) + if (Width->equalsInt(64) && Start->isZero()) { + Value *Vec = II->getArgOperand(1); + Value *Undef = UndefValue::get(Vec->getType()); + const uint32_t Mask[] = { 0, 2 }; + return ReplaceInstUsesWith( + CI, + Builder->CreateShuffleVector( + Vec, Undef, ConstantDataVector::get( + II->getContext(), ArrayRef(Mask)))); + } + break; + } + case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. if (Constant *Mask = dyn_cast(II->getArgOperand(2))) { Index: test/Transforms/InstCombine/2014-04-11-SSE4a-insertqi-64-bits.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/2014-04-11-SSE4a-insertqi-64-bits.ll @@ -0,0 +1,11 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s +; CHECK-NOT: insertqi + +define <2 x i64> @test(<2 x i64> %v, <2 x i64> %i) #0 { + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0) + ret <2 x i64> %1 +} + +; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi +declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind +