For below C code, we can use VNNI to combine the mul and add operation.
int usdot_prod_qi(unsigned char *restrict a, char *restrict b, int c, int n) {
int i;
for (i = 0; i < 32; i++) {
c += ((int)a[i] * (int)b[i]);
}
return c;}
We didn't support the combine acoss basic block in this patch.
clang-format: please reformat the code
- auto IsFreeTruncation = - [](SDValue &Op) -> bool { - if ((Op.getOpcode() == ISD::ZERO_EXTEND || - Op.getOpcode() == ISD::SIGN_EXTEND) && - Op.getOperand(0).getValueType().getScalarSizeInBits() <= 8) - return true; + auto IsFreeTruncation = [](SDValue &Op) -> bool { + if ((Op.getOpcode() == ISD::ZERO_EXTEND || + Op.getOpcode() == ISD::SIGN_EXTEND) && + Op.getOperand(0).getValueType().getScalarSizeInBits() <= 8) + return true;