Index: llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -706,12 +706,15 @@ .addImm(2) .cloneMemRefs(MI) .setMIFlags(MI.getFlags()); - BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri)) + BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri)) .addDef(SizeReg) .addReg(SizeReg) .addImm(16 * 2) .addImm(0); - BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB); + BuildMI(LoopBB, DL, TII->get(AArch64::Bcc)) + .addImm(AArch64CC::NE) + .addMBB(LoopBB) + .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); LoopBB->addSuccessor(LoopBB); LoopBB->addSuccessor(DoneBB); Index: llvm/test/CodeGen/AArch64/settag-merge-order.ll =================================================================== --- llvm/test/CodeGen/AArch64/settag-merge-order.ll +++ llvm/test/CodeGen/AArch64/settag-merge-order.ll @@ -11,8 +11,8 @@ ; CHECK-LABEL: stg128_128_gap_128_128: ; CHECK: mov x8, #512 ; CHECK: st2g sp, [sp], #32 -; CHECK: sub x8, x8, #32 -; CHECK: cbnz x8, +; CHECK: subs x8, x8, #32 +; CHECK: b.ne ; CHECK: ret %a = alloca i8, i32 128, align 16 %a2 = alloca i8, i32 128, align 16 @@ -40,18 +40,18 @@ if.then: ; CHECK: mov x8, #320 -; CHECK: sub x8, x8, #32 +; CHECK: subs x8, x8, #32 ; CHECK: st2g x9, [x9], #32 -; CHECK: cbnz x8, +; CHECK: b.ne call void @llvm.aarch64.settag(ptr %a, i64 160) call void @llvm.aarch64.settag(ptr %a2, i64 160) br label %if.end if.else: ; CHECK: mov x8, #256 -; CHECK: sub x8, x8, #32 +; CHECK: subs x8, x8, #32 ; CHECK: st2g x9, [x9], #32 -; CHECK: cbnz x8, +; CHECK: b.ne call void @llvm.aarch64.settag(ptr %c, i64 128) call void @llvm.aarch64.settag(ptr %c2, i64 128) br label %if.end @@ -59,8 +59,8 @@ if.end: ; CHECK: mov x8, #576 ; CHECK: st2g sp, [sp], #32 -; CHECK: sub x8, x8, #32 -; CHECK: cbnz x8, +; CHECK: subs x8, x8, #32 +; CHECK: b.ne call void @llvm.aarch64.settag(ptr %a, i64 160) call void @llvm.aarch64.settag(ptr %a2, i64 160) call void @llvm.aarch64.settag(ptr %c, i64 128) Index: llvm/test/CodeGen/AArch64/settag-merge.ll =================================================================== --- llvm/test/CodeGen/AArch64/settag-merge.ll +++ llvm/test/CodeGen/AArch64/settag-merge.ll @@ -56,8 +56,8 @@ ; CHECK-LABEL: stg128_128_128_128: ; CHECK: mov x8, #512 ; CHECK: st2g sp, [sp], #32 -; CHECK: sub x8, x8, #32 -; CHECK: cbnz x8, +; CHECK: subs x8, x8, #32 +; CHECK: b.ne ; CHECK: ret %a = alloca i8, i32 128, align 16 %b = alloca i8, i32 128, align 16 @@ -75,8 +75,8 @@ ; CHECK-LABEL: stg16_512_16: ; CHECK: mov x8, #544 ; CHECK: st2g sp, [sp], #32 -; CHECK: sub x8, x8, #32 -; CHECK: cbnz x8, +; CHECK: subs x8, x8, #32 +; CHECK: b.ne ; CHECK: ret %a = alloca i8, i32 16, align 16 %b = alloca i8, i32 512, align 16 @@ -92,8 +92,8 @@ ; CHECK-LABEL: stg512_512_512: ; CHECK: mov x8, #1536 ; CHECK: st2g sp, [sp], #32 -; CHECK: sub x8, x8, #32 -; CHECK: cbnz x8, +; CHECK: subs x8, x8, #32 +; CHECK: b.ne ; CHECK: ret %a = alloca i8, i32 512, align 16 %b = alloca i8, i32 512, align 16 @@ -136,9 +136,9 @@ ; CHECK: tbz w0, #0, [[LABEL:.LBB.*]] ; CHECK: add x9, sp, # ; CHECK: mov x8, #256 -; CHECK: sub x8, x8, #32 +; CHECK: subs x8, x8, #32 ; CHECK: st2g x9, [x9], #32 -; CHECK: cbnz x8, +; CHECK: b.ne ; CHECK: [[LABEL]]: ; CHECK: stg sp, [sp, # ; CHECK: st2g sp, [sp], # @@ -164,9 +164,9 @@ ; CHECK: tbz w0, #0, [[LABEL:.LBB.*]] ; CHECK: add x9, sp, # ; CHECK: mov x8, #1024 -; CHECK: sub x8, x8, #32 +; CHECK: subs x8, x8, #32 ; CHECK: st2g x9, [x9], #32 -; CHECK: cbnz x8, +; CHECK: b.ne ; CHECK: [[LABEL]]: ; CHECK: stg sp, [sp, # ; CHECK: st2g sp, [sp], # @@ -192,13 +192,13 @@ ; CHECK-LABEL: stg128_128_gap_128_128: ; CHECK: mov x9, sp ; CHECK: mov x8, #256 -; CHECK: sub x8, x8, #32 +; CHECK: subs x8, x8, #32 ; CHECK: st2g x9, [x9], #32 -; CHECK: cbnz x8, +; CHECK: b.ne ; CHECK: mov x8, #256 ; CHECK: st2g sp, [sp], #32 -; CHECK: sub x8, x8, #32 -; CHECK: cbnz x8, +; CHECK: subs x8, x8, #32 +; CHECK: b.ne ; CHECK: ret %a = alloca i8, i32 128, align 16 %a2 = alloca i8, i32 128, align 16 Index: llvm/test/CodeGen/AArch64/settag.ll =================================================================== --- llvm/test/CodeGen/AArch64/settag.ll +++ llvm/test/CodeGen/AArch64/settag.ll @@ -61,9 +61,9 @@ ; CHECK-NEXT: mov x8, #256 ; CHECK-NEXT: .LBB5_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub x8, x8, #32 +; CHECK-NEXT: subs x8, x8, #32 ; CHECK-NEXT: st2g x0, [x0], #32 -; CHECK-NEXT: cbnz x8, .LBB5_1 +; CHECK-NEXT: b.ne .LBB5_1 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: ret entry: @@ -78,9 +78,9 @@ ; CHECK-NEXT: stg x0, [x0], #16 ; CHECK-NEXT: .LBB6_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub x8, x8, #32 +; CHECK-NEXT: subs x8, x8, #32 ; CHECK-NEXT: st2g x0, [x0], #32 -; CHECK-NEXT: cbnz x8, .LBB6_1 +; CHECK-NEXT: b.ne .LBB6_1 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: ret entry: @@ -106,9 +106,9 @@ ; CHECK-NEXT: stzg x0, [x0], #16 ; CHECK-NEXT: .LBB8_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub x8, x8, #32 +; CHECK-NEXT: subs x8, x8, #32 ; CHECK-NEXT: stz2g x0, [x0], #32 -; CHECK-NEXT: cbnz x8, .LBB8_1 +; CHECK-NEXT: b.ne .LBB8_1 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: ret entry: @@ -155,8 +155,8 @@ ; CHECK-NEXT: .LBB11_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: st2g sp, [sp], #32 -; CHECK-NEXT: sub x8, x8, #32 -; CHECK-NEXT: cbnz x8, .LBB11_1 +; CHECK-NEXT: subs x8, x8, #32 +; CHECK-NEXT: b.ne .LBB11_1 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: stg sp, [sp], #16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -179,9 +179,9 @@ ; CHECK-NEXT: stg x9, [x9], #16 ; CHECK-NEXT: .LBB12_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub x8, x8, #32 +; CHECK-NEXT: subs x8, x8, #32 ; CHECK-NEXT: st2g x9, [x9], #32 -; CHECK-NEXT: cbnz x8, .LBB12_1 +; CHECK-NEXT: b.ne .LBB12_1 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: add sp, sp, #272 ; CHECK-NEXT: .cfi_def_cfa_offset 16 @@ -195,5 +195,20 @@ ret void } +; Verify that SLH works together with MTE stack tagging, +; see issue https://github.com/llvm/llvm-project/issues/61830 +define void @test_slh() speculative_load_hardening { +; CHECK-LABEL: test_slh +; Verify that the memtag loop uses a b.cc conditional branch +; rather than an cb[n]z branch. +;CHECK-NOT: cb{{n?}}z +;CHECK: b. + %d = alloca [48 x i32], align 4 + call void @b(ptr %d) + ret void +} +declare void @b(ptr) + + declare void @llvm.aarch64.settag(ptr %p, i64 %a) declare void @llvm.aarch64.settag.zero(ptr %p, i64 %a)