Index: lib/Analysis/DemandedBits.cpp =================================================================== --- lib/Analysis/DemandedBits.cpp +++ lib/Analysis/DemandedBits.cpp @@ -142,6 +142,27 @@ std::min(BitWidth, Known.countMaxTrailingZeros()+1)); } break; + case Intrinsic::fshl: + case Intrinsic::fshr: + if (OperandNo == 2) { + // Shift amount is modulo the bitwidth. For powers of two we have + // SA % BW == SA & (BW - 1). + if (isPowerOf2_32(BitWidth)) + AB = BitWidth - 1; + } else if (auto *SA = dyn_cast(II->getOperand(2))) { + // TODO: Support vectors. + // Normalize to funnel shift left. APInt shifts of BitWidth are well- + // defined, so no need to special-case zero shifts here. + uint64_t ShiftAmt = SA->getValue().urem(BitWidth); + if (II->getIntrinsicID() == Intrinsic::fshr) + ShiftAmt = BitWidth - ShiftAmt; + + if (OperandNo == 0) + AB = AOut.lshr(ShiftAmt); + else if (OperandNo == 1) + AB = AOut.shl(BitWidth - ShiftAmt); + } + break; } break; case Instruction::Add: Index: test/Analysis/DemandedBits/intrinsics.ll =================================================================== --- test/Analysis/DemandedBits/intrinsics.ll +++ test/Analysis/DemandedBits/intrinsics.ll @@ -23,3 +23,82 @@ } declare i32 @llvm.bitreverse.i32(i32) +; Funnel shifts +declare i32 @llvm.fshl.i32(i32, i32, i32) +declare i33 @llvm.fshr.i33(i33, i33, i33) + +; CHECK-DAG: DemandedBits: 0xff for %x2 = or i32 %x, 1 +; CHECK-DAG: DemandedBits: 0xff000000 for %y2 = or i32 %y, 1 +; CHECK-DAG: DemandedBits: 0xffff for %z = call i32 @llvm.fshl.i32(i32 %x2, i32 %y2, i32 8) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and i32 %z, 65535 +define i32 @test_fshl(i32 %x, i32 %y) { + %x2 = or i32 %x, 1 + %y2 = or i32 %y, 1 + %z = call i32 @llvm.fshl.i32(i32 %x2, i32 %y2, i32 8) + %r = and i32 %z, 65535 + ret i32 %r +} + +; CHECK-DAG: DemandedBits: 0xff for %x2 = or i33 %x, 1 +; CHECK-DAG: DemandedBits: 0x1fe000000 for %y2 = or i33 %y, 1 +; CHECK-DAG: DemandedBits: 0xffff for %z = call i33 @llvm.fshr.i33(i33 %x2, i33 %y2, i33 25) +; CHECK-DAG: DemandedBits: 0x1ffffffff for %r = and i33 %z, 65535 +define i33 @test_fshr(i33 %x, i33 %y) { + %x2 = or i33 %x, 1 + %y2 = or i33 %y, 1 + %z = call i33 @llvm.fshr.i33(i33 %x2, i33 %y2, i33 25) + %r = and i33 %z, 65535 + ret i33 %r +} + +; CHECK-DAG: DemandedBits: 0xffff for %x2 = or i32 %x, 1 +; CHECK-DAG: DemandedBits: 0x0 for %y2 = or i32 %y, 1 +; CHECK-DAG: DemandedBits: 0xffff for %z = call i32 @llvm.fshl.i32(i32 %x2, i32 %y2, i32 0) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and i32 %z, 65535 +define i32 @test_fshl_zero_shift(i32 %x, i32 %y) { + %x2 = or i32 %x, 1 + %y2 = or i32 %y, 1 + %z = call i32 @llvm.fshl.i32(i32 %x2, i32 %y2, i32 0) + %r = and i32 %z, 65535 + ret i32 %r +} + +; CHECK-DAG: DemandedBits: 0x0 for %x2 = or i33 %x, 1 +; CHECK-DAG: DemandedBits: 0xffff for %y2 = or i33 %y, 1 +; CHECK-DAG: DemandedBits: 0xffff for %z = call i33 @llvm.fshr.i33(i33 %x2, i33 %y2, i33 33) +; CHECK-DAG: DemandedBits: 0x1ffffffff for %r = and i33 %z, 65535 +define i33 @test_fshr_full_shift(i33 %x, i33 %y) { + %x2 = or i33 %x, 1 + %y2 = or i33 %y, 1 + %z = call i33 @llvm.fshr.i33(i33 %x2, i33 %y2, i33 33) + %r = and i33 %z, 65535 + ret i33 %r +} + +; CHECK-DAG: DemandedBits: 0xffffffff for %x2 = or i32 %x, 1 +; CHECK-DAG: DemandedBits: 0xffffffff for %y2 = or i32 %y, 1 +; CHECK-DAG: DemandedBits: 0x1f for %z2 = or i32 %z, 1 +; CHECK-DAG: DemandedBits: 0xffff for %f = call i32 @llvm.fshl.i32(i32 %x2, i32 %y2, i32 %z2) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and i32 %f, 65535 +define i32 @test_fshl_pow2_bitwidth(i32 %x, i32 %y, i32 %z) { + %x2 = or i32 %x, 1 + %y2 = or i32 %y, 1 + %z2 = or i32 %z, 1 + %f = call i32 @llvm.fshl.i32(i32 %x2, i32 %y2, i32 %z2) + %r = and i32 %f, 65535 + ret i32 %r +} + +; CHECK-DAG: DemandedBits: 0x1ffffffff for %x2 = or i33 %x, 1 +; CHECK-DAG: DemandedBits: 0x1ffffffff for %y2 = or i33 %y, 1 +; CHECK-DAG: DemandedBits: 0x1ffffffff for %z2 = or i33 %z, 1 +; CHECK-DAG: DemandedBits: 0xffff for %f = call i33 @llvm.fshr.i33(i33 %x2, i33 %y2, i33 %z2) +; CHECK-DAG: DemandedBits: 0x1ffffffff for %r = and i33 %f, 65535 +define i33 @test_fshr_non_pow2_bitwidth(i33 %x, i33 %y, i33 %z) { + %x2 = or i33 %x, 1 + %y2 = or i33 %y, 1 + %z2 = or i33 %z, 1 + %f = call i33 @llvm.fshr.i33(i33 %x2, i33 %y2, i33 %z2) + %r = and i33 %f, 65535 + ret i33 %r +}