Index: llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp =================================================================== --- llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -998,6 +998,20 @@ return IC.replaceInstUsesWith(II, II.getArgOperand(0)); } + if (MaskC->getValue().isShiftedMask()) { + // any single contingous sequence of 1s anywhere in the mask simply + // describes a subset of the input bits shifted to the appropriate + // position. Replace with the straight forward IR. + unsigned ShiftAmount = MaskC->getValue().countTrailingZeros(); + Value *Input = II.getArgOperand(0); + Value *Masked = IC.Builder.CreateAnd(Input, II.getArgOperand(1)); + Value *Shifted = IC.Builder.CreateLShr(Masked, + ConstantInt::get(II.getType(), + ShiftAmount)); + return IC.replaceInstUsesWith(II, Shifted); + } + + if (auto *SrcC = dyn_cast(II.getArgOperand(0))) { uint64_t Src = SrcC->getZExtValue(); uint64_t Mask = MaskC->getZExtValue(); @@ -1029,6 +1043,18 @@ if (MaskC->isAllOnesValue()) { return IC.replaceInstUsesWith(II, II.getArgOperand(0)); } + if (MaskC->getValue().isShiftedMask()) { + // any single contingous sequence of 1s anywhere in the mask simply + // describes a subset of the input bits shifted to the appropriate + // position. Replace with the straight forward IR. + unsigned ShiftAmount = MaskC->getValue().countTrailingZeros(); + Value *Input = II.getArgOperand(0); + Value *Shifted = IC.Builder.CreateShl(Input, + ConstantInt::get(II.getType(), + ShiftAmount)); + Value *Masked = IC.Builder.CreateAnd(Shifted, II.getArgOperand(1)); + return IC.replaceInstUsesWith(II, Masked); + } if (auto *SrcC = dyn_cast(II.getArgOperand(0))) { uint64_t Src = SrcC->getZExtValue(); Index: llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll +++ llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll @@ -306,6 +306,27 @@ ret i64 %1 } +define i32 @test_x86_pext_32_shifted_mask(i32 %x) nounwind readnone { +; CHECK-LABEL: @test_x86_pext_32_shifted_mask( +; CHECK-NEXT: %1 = lshr i32 %x, 1 +; CHECK-NEXT: %2 = and i32 %1, 3 +; CHECK-NEXT: ret i32 %2 +; + %1 = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 6) + ret i32 %1 +} + +define i64 @test_x86_pext_64_shifted_mask(i64 %x) nounwind readnone { +; CHECK-LABEL: @test_x86_pext_64_shifted_mask( +; CHECK-NEXT: %1 = lshr i64 %x, 1 +; CHECK-NEXT: %2 = and i64 %1, 3 +; CHECK-NEXT: ret i64 %2 +; + %1 = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 6) + ret i64 %1 +} + + define i32 @test_x86_pext_32_constant_fold() nounwind readnone { ; CHECK-LABEL: @test_x86_pext_32_constant_fold( ; CHECK-NEXT: ret i32 30001 @@ -370,6 +391,27 @@ ret i64 %1 } +define i32 @test_x86_pdep_32_shifted_mask(i32 %x) nounwind readnone { +; CHECK-LABEL: @test_x86_pdep_32_shifted_mask( +; CHECK-NEXT: %1 = shl i32 %x, 2 +; CHECK-NEXT: %2 = and i32 %1, 12 +; CHECK-NEXT: ret i32 %2 +; + %1 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 12) + ret i32 %1 +} + +define i64 @test_x86_pdep_64_shifted_mask(i64 %x) nounwind readnone { +; CHECK-LABEL: @test_x86_pdep_64_shifted_mask( +; CHECK-NEXT: %1 = shl i64 %x, 2 +; CHECK-NEXT: %2 = and i64 %1, 12 +; CHECK-NEXT: ret i64 %2 +; + %1 = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 12) + ret i64 %1 +} + + define i32 @test_x86_pdep_32_constant_fold() nounwind readnone { ; CHECK-LABEL: @test_x86_pdep_32_constant_fold( ; CHECK-NEXT: ret i32 807407616