Add mul to the list of ops that we canonicalize with a select to expose an identity merge
@lebedev.ri I'm not familiar with the midpoint vector tests - your comments on rGc38831e11dc33d2a83 suggest the vector tests were just for completeness, but the simplification in codegen suggests there might be additional missing DAG folds ?
@lebedev.ri This code now optimizes considerably in IR: https://llvm.godbolt.org/z/edhMY1crG
Should these test cases be updated to match?
define <16 x i8> @vec128_i8_signed_reg_mem(<16 x i8> %a1, <16 x i8>* nocapture readonly %a2_addr) { %a2 = load <16 x i8>, <16 x i8>* %a2_addr, align 16 %t3 = icmp slt <16 x i8> %a2, %a1 %1 = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> %a2, <16 x i8> %a1) %2 = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> %a2, <16 x i8> %a1) %t7 = sub <16 x i8> %2, %1 %t8 = lshr <16 x i8> %t7, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> %3 = sub nsw <16 x i8> zeroinitializer, %t8 %4 = select <16 x i1> %t3, <16 x i8> %3, <16 x i8> %t8 %a10 = add nsw <16 x i8> %4, %a1 ret <16 x i8> %a10 } declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>) declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>)