//    common multiple chains for the load/stores with same offsets in the loop, 
//    so that we can reuse the offsets and reduce the register pressure in the                                                                         
//    loop. This transformation can also increase the loop ILP as now each chain
//    uses its own loop induction add/addi. But this will increase the number of
//    add/addi in the loop.                                                     
//
//    char *p;                                                                  
//    A1 = p + base1                                                                                                                                   
//    A2 = p + base1 + offset                                                   
//    B1 = p + base2                                                            
//    B2 = p + base2 + offset                                                   
//                                                                              
//    for (int i = 0; i < n; i++)                                               
//      unsigned long x1 = *(unsigned long *)(A1 + i);                          
//      unsigned long x2 = *(unsigned long *)(A2 + i)                           
//      unsigned long x3 = *(unsigned long *)(B1 + i);                          
//      unsigned long x4 = *(unsigned long *)(B2 + i);                          
//    }                                                                         
//                                                                              
//    to look like this:                                                        
//                                                                                  
//    A1_new = p + base1 // chain 1                                                 
//    B1_new = p + base2 // chain 2, now inside the loop, common offset is          
//                       // reused.                                                 
//                                                                                  
//    for (long long i = 0; i < n; i+=count) {                                      
//      unsigned long x1 = *(unsigned long *)(A1_new + i);                          
//      unsigned long x2 = *(unsigned long *)((A1_new + i) + offset);               
//      unsigned long x3 = *(unsigned long *)(B1_new + i);                          
//      unsigned long x4 = *(unsigned long *)((B1_new + i) + offset);               
//    }Found some improvements for our internal benchmarks.
Can we avoid using ' in examples?