diff --git a/llvm/test/CodeGen/X86/x86-flags-intrinsics-redzone.ll b/llvm/test/CodeGen/X86/x86-flags-intrinsics-redzone.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/x86-flags-intrinsics-redzone.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp +; RUN: llc %s -o - | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; The intent of this test is to check that the call to the intrinsic +; llvm.x86.flags.read.u64 doesn't clobber -8(%rsp). + +define dso_local i32 @read_eflags(i64 noundef %z) local_unnamed_addr #0 { +; CHECK-LABEL: read_eflags: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: subq $32, %rsp +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 +; CHECK-NEXT: imulq %rax, %rcx +; CHECK-NEXT: movq %rcx, -8(%rbp) +; CHECK-NEXT: movq %rcx, -16(%rbp) +; CHECK-NEXT: movq %rcx, -24(%rbp) +; CHECK-NEXT: movq %rcx, -32(%rbp) +; CHECK-NEXT: pushfq +; CHECK-NEXT: popq %r8 +; CHECK-NEXT: movsbq %al, %rdi +; CHECK-NEXT: movsbl -1(%rbp), %eax +; CHECK-NEXT: addl %r8d, %edi +; CHECK-NEXT: addl %eax, %edi +; CHECK-NEXT: leaq -2(%rbp), %rdx +; CHECK-NEXT: movq $-30, %rsi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %for.body6.for.body6_crit_edge +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movsbl -1(%rbp,%rsi), %eax +; CHECK-NEXT: movsbl (%rdx), %ecx +; CHECK-NEXT: addl %r8d, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: imull %edi, %eax +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: # %bb.2: # %for.body6.for.body6_crit_edge.1 +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movsbl (%rbp,%rsi), %ecx +; CHECK-NEXT: movsbl -1(%rdx), %edi +; CHECK-NEXT: addl %r8d, %ecx +; CHECK-NEXT: addl %edi, %ecx +; CHECK-NEXT: imull %ecx, %eax +; CHECK-NEXT: addq $-2, %rdx +; CHECK-NEXT: addq $2, %rsi +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %for.cond.cleanup5 +; CHECK-NEXT: addq $32, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq +entry: + %foo = alloca [32 x i8], align 8 + %foo33 = getelementptr inbounds [32 x i8], [32 x i8]* %foo, i64 0, i64 0 + %0 = getelementptr inbounds [32 x i8], [32 x i8]* %foo, i64 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %0) #2 + %conv = trunc i64 %z to i8 + call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 8 dereferenceable(32) %foo33, i8 %conv, i64 32, i1 false) + %1 = tail call i64 @llvm.x86.flags.read.u64() + %sext = shl i64 %z, 56 + %conv937 = ashr exact i64 %sext, 56 + %arrayidx1138 = getelementptr inbounds [32 x i8], [32 x i8]* %foo, i64 0, i64 31 + %2 = load i8, i8* %arrayidx1138, align 1 + %conv1239 = sext i8 %2 to i64 + %add40 = add i64 %1, %conv937 + %add1441 = add i64 %add40, %conv1239 + %3 = trunc i64 %add1441 to i32 + br label %for.body6.for.body6_crit_edge + +for.cond.cleanup5: ; preds = %for.body6.for.body6_crit_edge + call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %0) #2 + ret i32 %conv16 + +for.body6.for.body6_crit_edge: ; preds = %for.body6.for.body6_crit_edge.1, %entry + %indvars.iv.next43 = phi i64 [ 1, %entry ], [ %indvars.iv.next.1, %for.body6.for.body6_crit_edge.1 ] + %conv1642 = phi i32 [ %3, %entry ], [ %conv16.1, %for.body6.for.body6_crit_edge.1 ] + %arrayidx8.phi.trans.insert = getelementptr inbounds [32 x i8], [32 x i8]* %foo, i64 0, i64 %indvars.iv.next43 + %.pre = load i8, i8* %arrayidx8.phi.trans.insert, align 1 + %conv9 = sext i8 %.pre to i64 + %4 = sub nuw nsw i64 31, %indvars.iv.next43 + %arrayidx11 = getelementptr inbounds [32 x i8], [32 x i8]* %foo, i64 0, i64 %4 + %5 = load i8, i8* %arrayidx11, align 1 + %conv12 = sext i8 %5 to i64 + %add = add i64 %1, %conv9 + %add14 = add i64 %add, %conv12 + %6 = trunc i64 %add14 to i32 + %conv16 = mul i32 %conv1642, %6 + %indvars.iv.next = add nuw nsw i64 %indvars.iv.next43, 1 + %cmp3.not = icmp eq i64 %indvars.iv.next, 32 + br i1 %cmp3.not, label %for.cond.cleanup5, label %for.body6.for.body6_crit_edge.1 + +for.body6.for.body6_crit_edge.1: ; preds = %for.body6.for.body6_crit_edge + %arrayidx8.phi.trans.insert.1 = getelementptr inbounds [32 x i8], [32 x i8]* %foo, i64 0, i64 %indvars.iv.next + %.pre.1 = load i8, i8* %arrayidx8.phi.trans.insert.1, align 1 + %conv9.1 = sext i8 %.pre.1 to i64 + %7 = sub nsw i64 30, %indvars.iv.next43 + %arrayidx11.1 = getelementptr inbounds [32 x i8], [32 x i8]* %foo, i64 0, i64 %7 + %8 = load i8, i8* %arrayidx11.1, align 1 + %conv12.1 = sext i8 %8 to i64 + %add.1 = add i64 %1, %conv9.1 + %add14.1 = add i64 %add.1, %conv12.1 + %9 = trunc i64 %add14.1 to i32 + %conv16.1 = mul i32 %conv16, %9 + %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next43, 2 + br label %for.body6.for.body6_crit_edge +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +declare i64 @llvm.x86.flags.read.u64() + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) + +attributes #0 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="generic" }