When the inline spiller rematerializes an instruction (e.g. a load from a constant pool), it should take the debug location from the instruction that immediately follows the rematerialization point.
Currently the original line number's instruction is used, which may not be the right one. Consider:
typedef float __m128 __attribute__((__vector_size__(16))); extern __m128 doSomething(__m128, __m128); __m128 foo(__m128 X) { // line 6 const __m128 V = {0.5f, 0.5f, 0.5f, 0.5f}; // line 7 __m128 Sub = X - V; // line 8 __m128 Add = X + V; // line 9 __m128 Result = doSomething(Add, Sub); // line 11 return V - Result; // line 13 }
generates:
pushq %rbp movq %rsp, %rbp .loc 1 8 0 prologue_end # test.cpp:8:0 vmovaps .LCPI0_0(%rip), %xmm2 # xmm2 =
[5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01]
vsubps %xmm0, %xmm2, %xmm1 .loc 1 9 0 # test.cpp:9:0 vaddps %xmm2, %xmm0, %xmm0 .loc 1 11 0 discriminator 1 # test.cpp:11:0 callq _Z11doSomethingDv4_fS_ .loc 1 8 0 # test.cpp:8:0 <==== back to line 8 from line 11. vmovaps .LCPI0_0(%rip), %xmm1 # xmm1 =
[5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01]
.loc 1 13 0 # test.cpp:13:0 vsubps %xmm0, %xmm1, %xmm0 popq %rbp retq