This patch extends using of widening of induction variables for the cases of "sub nsw" and "mul nsw" instructions. Currently only "add nsw" are widened. 
This patch eliminates tons of "sext" instructions for 64 bit code (and the corresponding target code) in cases like:
int N = 100;
float **A;
void foo(int x0, int x1)
{
        float * A_cur = &A[0][0];
        float * A_next = &A[1][0];
        for(int x = x0; x < x1; ++x).
        {
          // Currently only [x+N] case is widened. Others 2 cases lead to sext.
          // This patch fixes it, so all 3 cases do not need sext.
          const float div = A_cur[x + N] + A_cur[x - N] + A_cur[x * N];
          A_next[x] = div;
        }
}
...
> clang++ test.cpp -march=core-avx2 -Ofast  -fno-unroll-loops -fno-tree-vectorize -S -o -(with my patch)
.LBB0_2:                                # %for.body
                                        # =>This Inner Loop Header: Depth=1
        vmovss  (%rdi,%rcx,4), %xmm0
        vaddss  (%rdx,%rcx,4), %xmm0, %xmm0
        vaddss  (%rax), %xmm0, %xmm0
        vmovss  %xmm0, (%r8,%rcx,4)
        incq    %rcx
        addq    %r9, %rax
        cmpl    %esi, %ecx
        jl      .LBB0_2vs trunk:
.LBB0_2:                                # %for.body
                                        # =>This Inner Loop Header: Depth=1
        vmovss  (%r10,%rcx,4), %xmm0
        leal    (%r11,%rcx), %edx
        movslq  %edx, %rdx
        vaddss  (%rax,%rdx,4), %xmm0, %xmm0
        movslq  %edi, %rdi
        vaddss  (%rax,%rdi,4), %xmm0, %xmm0
        vmovss  %xmm0, (%r8,%rcx,4)
        incq    %rcx
        addl    %r9d, %edi
        cmpl    %esi, %ecx
        jl      .LBB0_2