diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -383,7 +383,9 @@ // i1 is a special case because SDAG i1 true is naturally zero extended // when widened using ANYEXT. We need to do it explicitly here. - if (MRI.getType(CurVReg).getSizeInBits() == 1) { + auto &Flags = CurArgInfo.Flags[0]; + if (MRI.getType(CurVReg).getSizeInBits() == 1 && !Flags.isSExt() && + !Flags.isZExt()) { CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0); } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) == 1) { @@ -569,7 +571,8 @@ MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 && "Unexpected registers used for i1 arg"); - if (!OrigArg.Flags[0].isZExt()) { + auto &Flags = OrigArg.Flags[0]; + if (!Flags.isZExt() && !Flags.isSExt()) { // Lower i1 argument as i8, and insert AssertZExt + Trunc later. Register OrigReg = OrigArg.Regs[0]; Register WideReg = MRI.createGenericVirtualRegister(LLT::scalar(8)); @@ -1110,7 +1113,8 @@ for (auto &OrigArg : Info.OrigArgs) { splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv); // AAPCS requires that we zero-extend i1 to 8 bits by the caller. - if (OrigArg.Ty->isIntegerTy(1)) { + auto &Flags = OrigArg.Flags[0]; + if (OrigArg.Ty->isIntegerTy(1) && !Flags.isSExt() && !Flags.isZExt()) { ArgInfo &OutArg = OutArgs.back(); assert(OutArg.Regs.size() == 1 && MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 && diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-signext.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-signext.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-signext.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-signext.ll @@ -42,7 +42,7 @@ ret i32 %x } -; Zeroext param is passed on the stack. We should still get a G_ASSERT_SEXT. +; signext param is passed on the stack. We should still get a G_ASSERT_SEXT. define i32 @signext_param_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, ; CHECK-LABEL: name: signext_param_stack ; CHECK: bb.1 (%ir-block.0): @@ -61,10 +61,8 @@ ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.0, align 8) ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[SEXTLOAD]], 1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32) - ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC1]](s1) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) ; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 i64 %g, i64 %h, i64 %i, i1 signext %j) { @@ -124,3 +122,45 @@ i8 signext %j) { ret i8 %j } + +define i32 @callee_signext_i1(i1 signext %0) { + ; CHECK-LABEL: name: callee_signext_i1 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) + ; CHECK-NEXT: $w0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %r = sext i1 %0 to i32 + ret i32 %r +} + +define i32 @caller_signext_i1() { + ; CHECK-LABEL: name: caller_signext_i1 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s8) = G_SEXT [[C]](s1) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[SEXT]](s8) + ; CHECK-NEXT: $w0 = COPY [[SEXT1]](s32) + ; CHECK-NEXT: BL @callee_signext_i1, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit-def $w0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %r = call i32 @callee_signext_i1(i1 signext true) + ret i32 %r +} + +define signext i1 @ret_signext_i1() { + ; CHECK-LABEL: name: ret_signext_i1 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1) + ; CHECK-NEXT: $w0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ret i1 true +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-zeroext.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-zeroext.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-zeroext.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-zeroext.ll @@ -116,3 +116,45 @@ i8 zeroext %j) { ret i8 %j } + +define i32 @callee_zeroext_i1(i1 zeroext %0) { + ; CHECK-LABEL: name: callee_zeroext_i1 + ; CHECK: bb.1 (%ir-block.1): + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %r = zext i1 %0 to i32 + ret i32 %r +} + +define i32 @caller_zeroext_i1() { + ; CHECK-LABEL: name: caller_zeroext_i1 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[C]](s1) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ZEXT]](s8) + ; CHECK-NEXT: $w0 = COPY [[ZEXT1]](s32) + ; CHECK-NEXT: BL @callee_zeroext_i1, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit-def $w0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %r = call i32 @callee_zeroext_i1(i1 zeroext true) + ret i32 %r +} + +define zeroext i1 @ret_zeroext_i1() { + ; CHECK-LABEL: name: ret_zeroext_i1 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s1) + ; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ret i1 true +} diff --git a/llvm/test/CodeGen/AArch64/bool-ext-inc.ll b/llvm/test/CodeGen/AArch64/bool-ext-inc.ll --- a/llvm/test/CodeGen/AArch64/bool-ext-inc.ll +++ b/llvm/test/CodeGen/AArch64/bool-ext-inc.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=GISEL define <4 x i32> @sextbool_add_vector(<4 x i32> %c1, <4 x i32> %c2, <4 x i32> %x) { ; CHECK-LABEL: sextbool_add_vector: @@ -7,6 +8,14 @@ ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret +; +; GISEL-LABEL: sextbool_add_vector: +; GISEL: // %bb.0: +; GISEL-NEXT: cmeq v1.4s, v0.4s, v1.4s +; GISEL-NEXT: mov v0.16b, v2.16b +; GISEL-NEXT: shl v1.4s, v1.4s, #31 +; GISEL-NEXT: ssra v0.4s, v1.4s, #31 +; GISEL-NEXT: ret %c = icmp eq <4 x i32> %c1, %c2 %b = sext <4 x i1> %c to <4 x i32> %s = add <4 x i32> %x, %b @@ -19,6 +28,15 @@ ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret +; +; GISEL-LABEL: zextbool_sub_vector: +; GISEL: // %bb.0: +; GISEL-NEXT: adrp x8, .LCPI1_0 +; GISEL-NEXT: cmeq v0.4s, v0.4s, v1.4s +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_0] +; GISEL-NEXT: and v0.16b, v0.16b, v3.16b +; GISEL-NEXT: sub v0.4s, v2.4s, v0.4s +; GISEL-NEXT: ret %c = icmp eq <4 x i32> %c1, %c2 %b = zext <4 x i1> %c to <4 x i32> %s = sub <4 x i32> %x, %b @@ -30,6 +48,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: add w0, w1, w0 ; CHECK-NEXT: ret +; +; GISEL-LABEL: assertsext_sub_1: +; GISEL: // %bb.0: +; GISEL-NEXT: and w8, w0, #0x1 +; GISEL-NEXT: sub w0, w1, w8 +; GISEL-NEXT: ret %e = zext i1 %cond to i32 %r = sub i32 %y, %e ret i32 %r @@ -40,6 +64,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub w0, w1, w0 ; CHECK-NEXT: ret +; +; GISEL-LABEL: assertsext_add_1: +; GISEL: // %bb.0: +; GISEL-NEXT: and w8, w0, #0x1 +; GISEL-NEXT: add w0, w8, w1 +; GISEL-NEXT: ret %e = zext i1 %cond to i32 %r = add i32 %e, %y ret i32 %r @@ -50,8 +80,50 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub w0, w1, w0 ; CHECK-NEXT: ret +; +; GISEL-LABEL: assertsext_add_1_commute: +; GISEL: // %bb.0: +; GISEL-NEXT: and w8, w0, #0x1 +; GISEL-NEXT: add w0, w1, w8 +; GISEL-NEXT: ret %e = zext i1 %cond to i32 %r = add i32 %y, %e ret i32 %r } +define i32 @callee_signext_i1(i1 signext %0) { +; CHECK-LABEL: callee_signext_i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ret +; +; GISEL-LABEL: callee_signext_i1: +; GISEL: // %bb.0: +; GISEL-NEXT: ret + %r = sext i1 %0 to i32 + ret i32 %r +} + +define i32 @caller_signext_i1() { +; CHECK-LABEL: caller_signext_i1: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mov w0, #-1 +; CHECK-NEXT: bl callee_signext_i1 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; +; GISEL-LABEL: caller_signext_i1: +; GISEL: // %bb.0: +; GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; GISEL-NEXT: .cfi_def_cfa_offset 16 +; GISEL-NEXT: .cfi_offset w30, -16 +; GISEL-NEXT: mov w8, #1 +; GISEL-NEXT: sbfx w0, w8, #0, #1 +; GISEL-NEXT: bl callee_signext_i1 +; GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; GISEL-NEXT: ret + %r = call i32 @callee_signext_i1(i1 signext true) + ret i32 %r +}