diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -1282,7 +1282,13 @@ if (!MO.isReg()) continue; - for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(MO.getReg())) + MCRegister Reg = MO.getReg(); + + // This picks up sibling registers (e.q. %al -> %ah). + for (MCRegUnitIterator Unit(Reg, &TRI); Unit.isValid(); ++Unit) + RegsToZero.reset(*Unit); + + for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(Reg)) RegsToZero.reset(SReg); } } diff --git a/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll b/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -opaque-pointers | FileCheck %s --check-prefix=I386 +; +; Make sure we don't zero out %eax when both %ah and %al are used. +; +; PR1766: https://github.com/ClangBuiltLinux/linux/issues/1766 + +%struct.maple_subtree_state = type { ptr } + +@mas_data_end_type = dso_local local_unnamed_addr global i32 0, align 4 +@ma_meta_end_mn_0_0_0_0_0_0 = dso_local local_unnamed_addr global i8 0, align 1 +@mt_pivots_0 = dso_local local_unnamed_addr global i8 0, align 1 +@mas_data_end___trans_tmp_2 = dso_local local_unnamed_addr global ptr null, align 4 +@mt_slots_0 = dso_local local_unnamed_addr global i8 0, align 1 + +define dso_local zeroext i1 @test1(ptr nocapture noundef readonly %0) local_unnamed_addr "zero-call-used-regs"="used-gpr" nounwind { +; I386-LABEL: test1: +; I386: # %bb.0: +; I386-NEXT: pushl %ebx +; I386-NEXT: subl $24, %esp +; I386-NEXT: movl {{[0-9]+}}(%esp), %eax +; I386-NEXT: movl (%eax), %eax +; I386-NEXT: movzbl (%eax), %ebx +; I386-NEXT: calll bar +; I386-NEXT: testb %al, %al +; I386-NEXT: # implicit-def: $al +; I386-NEXT: # kill: killed $al +; I386-NEXT: je .LBB0_6 +; I386-NEXT: # %bb.1: +; I386-NEXT: cmpl $0, mas_data_end_type +; I386-NEXT: je .LBB0_3 +; I386-NEXT: # %bb.2: +; I386-NEXT: movzbl ma_meta_end_mn_0_0_0_0_0_0, %eax +; I386-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; I386-NEXT: jmp .LBB0_6 +; I386-NEXT: .LBB0_3: +; I386-NEXT: movb mt_pivots_0, %ah +; I386-NEXT: movb %ah, %al +; I386-NEXT: decb %al +; I386-NEXT: movl mas_data_end___trans_tmp_2, %ecx +; I386-NEXT: movsbl %al, %edx +; I386-NEXT: cmpl $0, (%ecx,%edx,4) +; I386-NEXT: je .LBB0_5 +; I386-NEXT: # %bb.4: +; I386-NEXT: movb %al, %ah +; I386-NEXT: .LBB0_5: +; I386-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; I386-NEXT: .LBB0_6: +; I386-NEXT: movb mt_slots_0, %bh +; I386-NEXT: leal {{[0-9]+}}(%esp), %eax +; I386-NEXT: movl %eax, (%esp) +; I386-NEXT: calll baz +; I386-NEXT: subl $4, %esp +; I386-NEXT: cmpb %bh, %bl +; I386-NEXT: jae .LBB0_8 +; I386-NEXT: # %bb.7: +; I386-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; I386-NEXT: movl %eax, (%esp) +; I386-NEXT: calll gaz +; I386-NEXT: .LBB0_8: +; I386-NEXT: movb $1, %al +; I386-NEXT: addl $24, %esp +; I386-NEXT: popl %ebx +; I386-NEXT: xorl %ecx, %ecx +; I386-NEXT: xorl %edx, %edx +; I386-NEXT: retl + %2 = alloca %struct.maple_subtree_state, align 4 + %3 = load ptr, ptr %0, align 4 + %4 = load i8, ptr %3, align 1 + %5 = tail call zeroext i1 @bar() + br i1 %5, label %6, label %20 + +6: ; preds = %1 + %7 = load i32, ptr @mas_data_end_type, align 4 + %8 = icmp eq i32 %7, 0 + br i1 %8, label %11, label %9 + +9: ; preds = %6 + %10 = load i8, ptr @ma_meta_end_mn_0_0_0_0_0_0, align 1 + br label %20 + +11: ; preds = %6 + %12 = load i8, ptr @mt_pivots_0, align 1 + %13 = add i8 %12, -1 + %14 = load ptr, ptr @mas_data_end___trans_tmp_2, align 4 + %15 = sext i8 %13 to i32 + %16 = getelementptr inbounds [1 x i32], ptr %14, i32 0, i32 %15 + %17 = load i32, ptr %16, align 4 + %18 = icmp eq i32 %17, 0 + %19 = select i1 %18, i8 %12, i8 %13 + br label %20 + +20: ; preds = %11, %9, %1 + %21 = phi i8 [ undef, %1 ], [ %10, %9 ], [ %19, %11 ] + %22 = load i8, ptr @mt_slots_0, align 1 + call void @baz(ptr nonnull sret(%struct.maple_subtree_state) align 4 %2) + %23 = icmp ult i8 %4, %22 + br i1 %23, label %24, label %25 + +24: ; preds = %20 + call void @gaz(i8 noundef signext %21) + br label %25 + +25: ; preds = %20, %24 + ret i1 true +} + +declare dso_local zeroext i1 @bar(...) local_unnamed_addr + +declare dso_local void @baz(ptr sret(%struct.maple_subtree_state) align 4, ...) local_unnamed_addr + +declare dso_local void @gaz(i8 noundef signext) local_unnamed_addr