diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll --- a/llvm/test/CodeGen/RISCV/machine-combiner.ll +++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll @@ -1132,3 +1132,308 @@ %retval.0 = select i1 %tobool.not, double %mul, double %sub3 ret double %retval.0 } + +%"struct.std::atomic" = type { %"struct.std::__atomic_base" } +%"struct.std::__atomic_base" = type { i32 } + +; Since MachineCombiner patterns may lead to an increase in register pressure +; resulting in spills, test that MachineCombiner doesn't kick in if there are +; more live registers in the MBB than available on the target machine. +define i32 @test_reg_pressure(ptr %a) { +; CHECK-LABEL: test_reg_pressure: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: sd s0, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s3, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s4, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s5, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s6, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset s0, -8 +; CHECK-NEXT: .cfi_offset s1, -16 +; CHECK-NEXT: .cfi_offset s2, -24 +; CHECK-NEXT: .cfi_offset s3, -32 +; CHECK-NEXT: .cfi_offset s4, -40 +; CHECK-NEXT: .cfi_offset s5, -48 +; CHECK-NEXT: .cfi_offset s6, -56 +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: lw a6, 0(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: lw a7, 4(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: lw t0, 8(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: lw t1, 12(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: lw t2, 16(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: lw t6, 20(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: lw t3, 24(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: lw t4, 28(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: lw s3, 32(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: lw s2, 36(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: lw t5, 40(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: lw s4, 44(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: lw s5, 48(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a4, a6, a7 +; CHECK-NEXT: lw s6, 52(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or s0, t0, t1 +; CHECK-NEXT: lw a7, 56(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a1, t2, t6 +; CHECK-NEXT: lw a6, 60(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a4, a4, s0 +; CHECK-NEXT: lw s0, 64(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a1, a1, t3 +; CHECK-NEXT: lw a3, 68(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a2, t4, s3 +; CHECK-NEXT: lw t2, 72(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a1, a1, a4 +; CHECK-NEXT: lw a4, 76(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a2, a2, s2 +; CHECK-NEXT: lw a4, 80(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a5, s4, s5 +; CHECK-NEXT: lw t0, 84(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a2, a2, t5 +; CHECK-NEXT: lw t1, 88(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a5, a5, s6 +; CHECK-NEXT: lw s1, 92(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: lw a2, 96(a0) +; CHECK-NEXT: or a5, a5, a7 +; CHECK-NEXT: or a3, a3, s0 +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a2, a2, s1 +; CHECK-NEXT: lw s1, 100(a0) +; CHECK-NEXT: or a5, a5, a6 +; CHECK-NEXT: or a3, a3, t2 +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a2, a2, s1 +; CHECK-NEXT: lw s1, 104(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a3, a3, a4 +; CHECK-NEXT: lw a4, 108(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a2, a2, s1 +; CHECK-NEXT: lw s1, 112(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a3, a3, t0 +; CHECK-NEXT: lw s0, 116(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a2, a2, a4 +; CHECK-NEXT: lw a4, 120(a0) +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a1, a1, a5 +; CHECK-NEXT: lw a5, 124(a0) +; CHECK-NEXT: or a3, a3, t1 +; CHECK-NEXT: or a2, a2, s1 +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: or a4, a4, a5 +; CHECK-NEXT: lw a5, 124(a0) +; CHECK-NEXT: or a1, a1, a3 +; CHECK-NEXT: or a2, a2, s0 +; CHECK-NEXT: or a4, a4, a5 +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: fence rw, rw +; CHECK-NEXT: lw a0, 124(a0) +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: or a0, a0, a4 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: fence r, rw +; CHECK-NEXT: ld s0, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s2, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s3, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s4, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s5, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s6, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: ret +entry: + %0 = load atomic i32, ptr %a seq_cst, align 4 + %arrayidx1 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 1 + %1 = load atomic i32, ptr %arrayidx1 seq_cst, align 4 + %arrayidx3 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 2 + %2 = load atomic i32, ptr %arrayidx3 seq_cst, align 4 + %arrayidx5 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 3 + %3 = load atomic i32, ptr %arrayidx5 seq_cst, align 4 + %arrayidx7 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 4 + %4 = load atomic i32, ptr %arrayidx7 seq_cst, align 4 + %arrayidx9 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 5 + %5 = load atomic i32, ptr %arrayidx9 seq_cst, align 4 + %arrayidx11 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 6 + %6 = load atomic i32, ptr %arrayidx11 seq_cst, align 4 + %arrayidx13 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 7 + %7 = load atomic i32, ptr %arrayidx13 seq_cst, align 4 + %arrayidx15 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 8 + %8 = load atomic i32, ptr %arrayidx15 seq_cst, align 4 + %arrayidx17 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 9 + %9 = load atomic i32, ptr %arrayidx17 seq_cst, align 4 + %arrayidx19 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 10 + %10 = load atomic i32, ptr %arrayidx19 seq_cst, align 4 + %arrayidx21 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 11 + %11 = load atomic i32, ptr %arrayidx21 seq_cst, align 4 + %arrayidx23 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 12 + %12 = load atomic i32, ptr %arrayidx23 seq_cst, align 4 + %arrayidx25 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 13 + %13 = load atomic i32, ptr %arrayidx25 seq_cst, align 4 + %arrayidx27 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 14 + %14 = load atomic i32, ptr %arrayidx27 seq_cst, align 4 + %arrayidx29 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 15 + %15 = load atomic i32, ptr %arrayidx29 seq_cst, align 4 + %arrayidx31 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 16 + %16 = load atomic i32, ptr %arrayidx31 seq_cst, align 4 + %arrayidx33 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 17 + %17 = load atomic i32, ptr %arrayidx33 seq_cst, align 4 + %arrayidx35 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 18 + %18 = load atomic i32, ptr %arrayidx35 seq_cst, align 4 + %arrayidx37 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 19 + %19 = load atomic i32, ptr %arrayidx37 seq_cst, align 4 + %arrayidx39 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 20 + %20 = load atomic i32, ptr %arrayidx39 seq_cst, align 4 + %arrayidx41 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 21 + %21 = load atomic i32, ptr %arrayidx41 seq_cst, align 4 + %arrayidx43 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 22 + %22 = load atomic i32, ptr %arrayidx43 seq_cst, align 4 + %arrayidx45 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 23 + %23 = load atomic i32, ptr %arrayidx45 seq_cst, align 4 + %arrayidx47 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 24 + %24 = load atomic i32, ptr %arrayidx47 seq_cst, align 4 + %arrayidx49 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 25 + %25 = load atomic i32, ptr %arrayidx49 seq_cst, align 4 + %arrayidx51 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 26 + %26 = load atomic i32, ptr %arrayidx51 seq_cst, align 4 + %arrayidx53 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 27 + %27 = load atomic i32, ptr %arrayidx53 seq_cst, align 4 + %arrayidx55 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 28 + %28 = load atomic i32, ptr %arrayidx55 seq_cst, align 4 + %arrayidx57 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 29 + %29 = load atomic i32, ptr %arrayidx57 seq_cst, align 4 + %arrayidx59 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 30 + %30 = load atomic i32, ptr %arrayidx59 seq_cst, align 4 + %arrayidx61 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 31 + %31 = load atomic i32, ptr %arrayidx61 seq_cst, align 4 + %arrayidx63 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 32 + %32 = load atomic i32, ptr %arrayidx61 seq_cst, align 4 + %arrayidx65 = getelementptr inbounds %"struct.std::atomic", ptr %a, i64 33 + %33 = load atomic i32, ptr %arrayidx61 seq_cst, align 4 + %or0 = or i32 %0, %0 + %or1 = or i32 %1, %1 + %or2 = or i32 %2, %2 + %or3 = or i32 %3, %3 + %or4 = or i32 %4, %4 + %or5 = or i32 %5, %5 + %or6 = or i32 %6, %6 + %or7 = or i32 %7, %7 + %or8 = or i32 %8, %8 + %or9 = or i32 %9, %9 + %or10 = or i32 %10, %10 + %or11 = or i32 %11, %11 + %or12 = or i32 %12, %12 + %or13 = or i32 %13, %13 + %or14 = or i32 %14, %14 + %or15 = or i32 %15, %15 + %or16 = or i32 %16, %16 + %or17 = or i32 %17, %17 + %or18 = or i32 %18, %18 + %or19 = or i32 %19, %19 + %or20 = or i32 %20, %20 + %or21 = or i32 %21, %21 + %or22 = or i32 %22, %22 + %or23 = or i32 %23, %23 + %or24 = or i32 %24, %24 + %or25 = or i32 %25, %25 + %or26 = or i32 %26, %26 + %or27 = or i32 %27, %27 + %or28 = or i32 %28, %28 + %or29 = or i32 %29, %29 + %or30 = or i32 %30, %30 + %or31 = or i32 %31, %31 + %or32 = or i32 %32, %32 + %or33 = or i32 %33, %33 + %or34 = or i32 %or0, %or1 + %or35 = or i32 %or34, %or2 + %or36 = or i32 %or35, %or3 + %or37 = or i32 %or36, %or4 + %or38 = or i32 %or37, %or5 + %or39 = or i32 %or38, %or6 + %or40 = or i32 %or39, %or7 + %or41 = or i32 %or40, %or8 + %or42 = or i32 %or41, %or9 + %or43 = or i32 %or42, %or10 + %or44 = or i32 %or43, %or11 + %or45 = or i32 %or44, %or12 + %or46 = or i32 %or45, %or13 + %or47 = or i32 %or46, %or14 + %or48 = or i32 %or47, %or15 + %or49 = or i32 %or48, %or16 + %or50 = or i32 %or49, %or17 + %or51 = or i32 %or50, %or18 + %or52 = or i32 %or51, %or20 + %or53 = or i32 %or52, %or21 + %or54 = or i32 %or53, %or22 + %or55 = or i32 %or54, %or23 + %or56 = or i32 %or55, %or24 + %or57 = or i32 %or56, %or25 + %or58 = or i32 %or57, %or26 + %or59 = or i32 %or58, %or27 + %or60 = or i32 %or59, %or28 + %or61 = or i32 %or60, %or29 + %or62 = or i32 %or61, %or30 + %or63 = or i32 %or62, %or31 + %or64 = or i32 %or63, %or32 + %or65 = or i32 %or64, %or33 + ret i32 %or65 +}