diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2487,6 +2487,64 @@ // TODO should we convert this to an AND if the RHS is constant? } break; + case Intrinsic::x86_bmi_pext_32: + case Intrinsic::x86_bmi_pext_64: + if (auto *MaskC = dyn_cast(II->getArgOperand(1))) { + if (MaskC->isNullValue()) + return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0)); + if (MaskC->isAllOnesValue()) + return replaceInstUsesWith(CI, II->getArgOperand(0)); + + if (auto *SrcC = dyn_cast(II->getArgOperand(0))) { + uint64_t Src = SrcC->getZExtValue(); + uint64_t Mask = MaskC->getZExtValue(); + uint64_t Result = 0; + uint64_t BitToSet = 1; + + while (Mask) { + // Isolate lowest set bit. + uint64_t BitToTest = Mask & -Mask; + if (BitToTest & Src) + Result |= BitToSet; + + BitToSet <<= 1; + // Clear lowest set bit. + Mask &= Mask - 1; + } + + return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result)); + } + } + break; + case Intrinsic::x86_bmi_pdep_32: + case Intrinsic::x86_bmi_pdep_64: + if (auto *MaskC = dyn_cast(II->getArgOperand(1))) { + if (MaskC->isNullValue()) + return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0)); + if (MaskC->isAllOnesValue()) + return replaceInstUsesWith(CI, II->getArgOperand(0)); + + if (auto *SrcC = dyn_cast(II->getArgOperand(0))) { + uint64_t Src = SrcC->getZExtValue(); + uint64_t Mask = MaskC->getZExtValue(); + uint64_t Result = 0; + uint64_t BitToTest = 1; + + while (Mask) { + // Isolate lowest set bit. + uint64_t BitToSet = Mask & -Mask; + if (BitToTest & Src) + Result |= BitToSet; + + BitToTest <<= 1; + // Clear lowest set bit; + Mask &= Mask - 1; + } + + return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result)); + } + } + break; case Intrinsic::x86_vcvtph2ps_128: case Intrinsic::x86_vcvtph2ps_256: { diff --git a/llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll b/llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll --- a/llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll @@ -7,6 +7,10 @@ declare i64 @llvm.x86.bmi.bextr.64(i64, i64) nounwind readnone declare i32 @llvm.x86.bmi.bzhi.32(i32, i32) nounwind readnone declare i64 @llvm.x86.bmi.bzhi.64(i64, i64) nounwind readnone +declare i32 @llvm.x86.bmi.pext.32(i32, i32) nounwind readnone +declare i64 @llvm.x86.bmi.pext.64(i64, i64) nounwind readnone +declare i32 @llvm.x86.bmi.pdep.32(i32, i32) nounwind readnone +declare i64 @llvm.x86.bmi.pdep.64(i64, i64) nounwind readnone define i32 @test_x86_tbm_bextri_u32(i32 %a) nounwind readnone { ; CHECK-LABEL: @test_x86_tbm_bextri_u32( @@ -269,3 +273,131 @@ %1 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 5, i64 1) ret i64 %1 } + +define i32 @test_x86_pext_32_zero_mask(i32 %x) nounwind readnone { +; CHECK-LABEL: @test_x86_pext_32_zero_mask( +; CHECK-NEXT: ret i32 0 +; + %1 = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 0) + ret i32 %1 +} + +define i64 @test_x86_pext_64_zero_mask(i64 %x) nounwind readnone { +; CHECK-LABEL: @test_x86_pext_64_zero_mask( +; CHECK-NEXT: ret i64 0 +; + %1 = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 0) + ret i64 %1 +} + +define i32 @test_x86_pext_32_allones_mask(i32 %x) nounwind readnone { +; CHECK-LABEL: @test_x86_pext_32_allones_mask( +; CHECK-NEXT: ret i32 %x +; + %1 = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 -1) + ret i32 %1 +} + +define i64 @test_x86_pext_64_allones_mask(i64 %x) nounwind readnone { +; CHECK-LABEL: @test_x86_pext_64_allones_mask( +; CHECK-NEXT: ret i64 %x +; + %1 = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 -1) + ret i64 %1 +} + +define i32 @test_x86_pext_32_constant_fold() nounwind readnone { +; CHECK-LABEL: @test_x86_pext_32_constant_fold( +; CHECK-NEXT: ret i32 30001 +; + %1 = tail call i32 @llvm.x86.bmi.pext.32(i32 1985229328, i32 4042322160) + ret i32 %1 +} + +define i64 @test_x86_pext_64_constant_fold() nounwind readnone { +; CHECK-LABEL: @test_x86_pext_64_constant_fold( +; CHECK-NEXT: ret i64 1966210489 +; + %1 = tail call i64 @llvm.x86.bmi.pext.64(i64 8526495043095935640, i64 -1085102592571150096) + ret i64 %1 +} + +define i32 @test_x86_pext_32_constant_fold_2() nounwind readnone { +; CHECK-LABEL: @test_x86_pext_32_constant_fold_2( +; CHECK-NEXT: ret i32 30224 +; + %1 = tail call i32 @llvm.x86.bmi.pext.32(i32 1985229328, i32 4278190335) + ret i32 %1 +} + +define i64 @test_x86_pext_64_constant_fold_2() nounwind readnone { +; CHECK-LABEL: @test_x86_pext_64_constant_fold_2( +; CHECK-NEXT: ret i64 1980816570 +; + %1 = tail call i64 @llvm.x86.bmi.pext.64(i64 8526495043095935640, i64 -72056498804490496) + ret i64 %1 +} + +define i32 @test_x86_pdep_32_zero_mask(i32 %x) nounwind readnone { +; CHECK-LABEL: @test_x86_pdep_32_zero_mask( +; CHECK-NEXT: ret i32 0 +; + %1 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 0) + ret i32 %1 +} + +define i64 @test_x86_pdep_64_zero_mask(i64 %x) nounwind readnone { +; CHECK-LABEL: @test_x86_pdep_64_zero_mask( +; CHECK-NEXT: ret i64 0 +; + %1 = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 0) + ret i64 %1 +} + +define i32 @test_x86_pdep_32_allones_mask(i32 %x) nounwind readnone { +; CHECK-LABEL: @test_x86_pdep_32_allones_mask( +; CHECK-NEXT: ret i32 %x +; + %1 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 -1) + ret i32 %1 +} + +define i64 @test_x86_pdep_64_allones_mask(i64 %x) nounwind readnone { +; CHECK-LABEL: @test_x86_pdep_64_allones_mask( +; CHECK-NEXT: ret i64 %x +; + %1 = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 -1) + ret i64 %1 +} + +define i32 @test_x86_pdep_32_constant_fold() nounwind readnone { +; CHECK-LABEL: @test_x86_pdep_32_constant_fold( +; CHECK-NEXT: ret i32 807407616 +; + %1 = tail call i32 @llvm.x86.bmi.pdep.32(i32 1985229328, i32 4042322160) + ret i32 %1 +} + +define i64 @test_x86_pdep_64_constant_fold() nounwind readnone { +; CHECK-LABEL: @test_x86_pdep_64_constant_fold( +; CHECK-NEXT: ret i64 -1089641583808049024 +; + %1 = tail call i64 @llvm.x86.bmi.pdep.64(i64 8526495043095935640, i64 -1085102592571150096) + ret i64 %1 +} + +define i32 @test_x86_pdep_32_constant_fold_2() nounwind readnone { +; CHECK-LABEL: @test_x86_pdep_32_constant_fold_2( +; CHECK-NEXT: ret i32 838860816 +; + %1 = tail call i32 @llvm.x86.bmi.pdep.32(i32 1985229328, i32 4278190335) + ret i32 %1 +} + +define i64 @test_x86_pdep_64_constant_fold_2() nounwind readnone { +; CHECK-LABEL: @test_x86_pdep_64_constant_fold_2( +; CHECK-NEXT: ret i64 -144114243170822144 +; + %1 = tail call i64 @llvm.x86.bmi.pdep.64(i64 8526495043095935640, i64 -72056498804490496) + ret i64 %1 +}