Added codegen for scan directives in parallel for regions.
Emits the code for the directive with inscan reductions.
Original code:
#pragma omp for simd reduction(inscan, op : ...)
for(...) {
<input phase>;
#pragma omp scan (in)exclusive(...)
<scan phase>
}is transformed to something:
size num_iters = <num_iters>;
<type> buffer[num_iters];
#pragma omp for simd
for (i: 0..<num_iters>) {
<input phase>;
buffer[i] = red;
}
#pragma omp barrier
for (int k = 0; k != ceil(log2(num_iters)); ++k)
for (size cnt = last_iter; cnt >= pow(2, k); --k)
buffer[i] op= buffer[i-pow(2,k)];
#pragma omp for simd
for (0..<num_iters>) {
red = InclusiveScan ? buffer[i] : buffer[i-1];
<scan phase>;
}