diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp --- a/llvm/lib/CodeGen/StackColoring.cpp +++ b/llvm/lib/CodeGen/StackColoring.cpp @@ -913,6 +913,11 @@ assert(To && From && "Invalid allocation object"); Allocas[From] = To; + // If From is before wo, its possible that there is a use of From between + // them. + if (From->comesBefore(To)) + const_cast(To)->moveBefore(const_cast(From)); + // AA might be used later for instruction scheduling, and we need it to be // able to deduce the correct aliasing releationships between pointers // derived from the alloca being remapped and the target of that remapping. diff --git a/llvm/test/CodeGen/X86/StackColoring-use-between-allocas.mir b/llvm/test/CodeGen/X86/StackColoring-use-between-allocas.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/StackColoring-use-between-allocas.mir @@ -0,0 +1,212 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=stack-coloring %s -o - | FileCheck %s + +## %tmpcast is between 2 allocas. %ref.tmp will replaced by %tmp. Make sure +## that we reorder %tmp to be before the use in %tmpcast. + +# CHECK: %a = alloca %struct.e, align 1 +# CHECK: %tmp = alloca %"struct.e::f", align 8 +# CHECK: %0 = bitcast %"struct.e::f"* %tmp to { <2 x float>, <2 x float> }* +# CHECK: %ref.tmp = alloca { <2 x float>, <2 x float> }, align 8 +# CHECK: %tmpcast = bitcast { <2 x float>, <2 x float> }* %0 to %class.d* +# CHECK: %b = alloca %struct.j, align 4 + +--- | + source_filename = "" + target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-apple-macosx10.15.0" + + %class.d = type { [4 x float] } + %struct.e = type { i8 } + %struct.j = type { i32 } + %"struct.e::f" = type { [4 x i32*] } + + @i = local_unnamed_addr global %class.d zeroinitializer, align 8 + + ; Function Attrs: ssp uwtable + define void @_Z1gv() local_unnamed_addr #0 { + entry: + %a = alloca %struct.e, align 1 + %ref.tmp = alloca { <2 x float>, <2 x float> }, align 8 + %tmpcast = bitcast { <2 x float>, <2 x float> }* %ref.tmp to %class.d* + %b = alloca %struct.j, align 4 + %tmp = alloca %"struct.e::f", align 8 + %0 = bitcast %struct.e* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %0) #3 + %1 = bitcast { <2 x float>, <2 x float> }* %ref.tmp to i8* + call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %1) #3 + %agg.tmp.sroa.0.0.copyload = load <2 x float>, <2 x float>* bitcast (%class.d* @i to <2 x float>*), align 8, !tbaa.struct !3 + %agg.tmp.sroa.2.0.copyload = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds (%class.d, %class.d* @i, i64 0, i32 0, i64 2) to <2 x float>*), align 8, !tbaa.struct !3 + %call = tail call { <2 x float>, <2 x float> } @_Zng1d(<2 x float> %agg.tmp.sroa.0.0.copyload, <2 x float> %agg.tmp.sroa.2.0.copyload) + %2 = bitcast { <2 x float>, <2 x float> }* %ref.tmp to <2 x float>* + %3 = extractvalue { <2 x float>, <2 x float> } %call, 0 + store <2 x float> %3, <2 x float>* %2, align 8 + %4 = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* %ref.tmp, i64 0, i32 1 + %5 = extractvalue { <2 x float>, <2 x float> } %call, 1 + store <2 x float> %5, <2 x float>* %4, align 8 + call void @_ZN1e1hERK1d(%struct.e* nonnull %a, %class.d* nonnull dereferenceable(16) %tmpcast) + call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %1) #3 + %6 = bitcast %struct.j* %b to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %6) #3 + %7 = bitcast %"struct.e::f"* %tmp to i8* + call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %7) #3 + call void @_ZN1j1kEv(%"struct.e::f"* nonnull sret align 8 %tmp, %struct.j* nonnull %b) + call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %7) #3 + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %6) #3 + call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %0) #3 + ret void + } + + ; Function Attrs: argmemonly nounwind willreturn + declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + + declare void @_ZN1e1hERK1d(%struct.e*, %class.d* dereferenceable(16)) local_unnamed_addr #2 + + declare { <2 x float>, <2 x float> } @_Zng1d(<2 x float>, <2 x float>) local_unnamed_addr #2 + + ; Function Attrs: argmemonly nounwind willreturn + declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + + declare void @_ZN1j1kEv(%"struct.e::f"* sret align 8, %struct.j*) local_unnamed_addr #2 + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #3 + + attributes #0 = { ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="64" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { argmemonly nounwind willreturn } + attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #3 = { nounwind } + + !llvm.module.flags = !{!0, !1} + !llvm.ident = !{!2} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 7, !"PIC Level", i32 2} + !2 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git 82093e8fb7d65486ff450d33bf386aabd0d194f7)"} + !3 = !{i64 0, i64 16, !4} + !4 = !{!5, !5, i64 0} + !5 = !{!"omnipotent char", !6, i64 0} + !6 = !{!"Simple C++ TBAA"} + +... +--- +name: _Z1gv +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: vr128, preferred-register: '' } + - { id: 1, class: vr128, preferred-register: '' } + - { id: 2, class: vr128, preferred-register: '' } + - { id: 3, class: vr128, preferred-register: '' } + - { id: 4, class: gr64, preferred-register: '' } + - { id: 5, class: gr64, preferred-register: '' } + - { id: 6, class: gr64, preferred-register: '' } + - { id: 7, class: gr64, preferred-register: '' } +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: false + hasCalls: true + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: a, type: default, offset: 0, size: 1, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: ref.tmp, type: default, offset: 0, size: 16, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: b, type: default, offset: 0, size: 4, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: tmp, type: default, offset: 0, size: 32, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + ; CHECK-LABEL: name: _Z1gv + ; CHECK: [[MOVSDrm:%[0-9]+]]:vr128 = MOVSDrm $rip, 1, $noreg, @i, $noreg :: (dereferenceable load 8 from `<2 x float>* bitcast (%class.d* @i to <2 x float>*)`) + ; CHECK: [[MOVSDrm1:%[0-9]+]]:vr128 = MOVSDrm $rip, 1, $noreg, @i + 8, $noreg :: (dereferenceable load 8 from `<2 x float>* bitcast (float* getelementptr inbounds (%class.d, %class.d* @i, i64 0, i32 0, i64 2) to <2 x float>*)`) + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK: $xmm0 = COPY [[MOVSDrm]] + ; CHECK: $xmm1 = COPY [[MOVSDrm1]] + ; CHECK: CALL64pcrel32 @_Zng1d, csr_64, implicit $rsp, implicit $ssp, implicit $xmm0, implicit $xmm1, implicit-def $rsp, implicit-def $ssp, implicit-def $xmm0, implicit-def $xmm1 + ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0 + ; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY $xmm1 + ; CHECK: MOVLPDmr %stack.3.tmp, 1, $noreg, 0, $noreg, [[COPY]] :: (store 8 into %ir.3) + ; CHECK: MOVLPDmr %stack.3.tmp, 1, $noreg, 8, $noreg, [[COPY1]] :: (store 8 into %ir.5) + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK: [[LEA64r:%[0-9]+]]:gr64 = LEA64r %stack.0.a, 1, $noreg, 0, $noreg + ; CHECK: [[LEA64r1:%[0-9]+]]:gr64 = LEA64r %stack.3.tmp, 1, $noreg, 0, $noreg + ; CHECK: $rdi = COPY [[LEA64r]] + ; CHECK: $rsi = COPY [[LEA64r1]] + ; CHECK: CALL64pcrel32 @_ZN1e1hERK1d, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit-def $rsp, implicit-def $ssp + ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK: [[LEA64r2:%[0-9]+]]:gr64 = LEA64r %stack.3.tmp, 1, $noreg, 0, $noreg + ; CHECK: [[LEA64r3:%[0-9]+]]:gr64 = LEA64r %stack.2.b, 1, $noreg, 0, $noreg + ; CHECK: $rdi = COPY [[LEA64r2]] + ; CHECK: $rsi = COPY [[LEA64r3]] + ; CHECK: CALL64pcrel32 @_ZN1j1kEv, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit-def $rsp, implicit-def $ssp + ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK: RET 0 + LIFETIME_START %stack.0.a + LIFETIME_START %stack.1.ref.tmp + %0:vr128 = MOVSDrm $rip, 1, $noreg, @i, $noreg :: (dereferenceable load 8 from `<2 x float>* bitcast (%class.d* @i to <2 x float>*)`) + %1:vr128 = MOVSDrm $rip, 1, $noreg, @i + 8, $noreg :: (dereferenceable load 8 from `<2 x float>* bitcast (float* getelementptr inbounds (%class.d, %class.d* @i, i64 0, i32 0, i64 2) to <2 x float>*)`) + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $xmm0 = COPY %0 + $xmm1 = COPY %1 + CALL64pcrel32 @_Zng1d, csr_64, implicit $rsp, implicit $ssp, implicit $xmm0, implicit $xmm1, implicit-def $rsp, implicit-def $ssp, implicit-def $xmm0, implicit-def $xmm1 + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %2:vr128 = COPY $xmm0 + %3:vr128 = COPY $xmm1 + MOVLPDmr %stack.1.ref.tmp, 1, $noreg, 0, $noreg, %2 :: (store 8 into %ir.2) + MOVLPDmr %stack.1.ref.tmp, 1, $noreg, 8, $noreg, %3 :: (store 8 into %ir.4) + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %4:gr64 = LEA64r %stack.0.a, 1, $noreg, 0, $noreg + %5:gr64 = LEA64r %stack.1.ref.tmp, 1, $noreg, 0, $noreg + $rdi = COPY %4 + $rsi = COPY %5 + CALL64pcrel32 @_ZN1e1hERK1d, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit-def $rsp, implicit-def $ssp + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + LIFETIME_END %stack.1.ref.tmp + LIFETIME_START %stack.2.b + LIFETIME_START %stack.3.tmp + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %6:gr64 = LEA64r %stack.3.tmp, 1, $noreg, 0, $noreg + %7:gr64 = LEA64r %stack.2.b, 1, $noreg, 0, $noreg + $rdi = COPY %6 + $rsi = COPY %7 + CALL64pcrel32 @_ZN1j1kEv, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit-def $rsp, implicit-def $ssp + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + LIFETIME_END %stack.3.tmp + LIFETIME_END %stack.2.b + LIFETIME_END %stack.0.a + RET 0 + +...