diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -3269,24 +3269,12 @@ // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs) // + SUBREG_TO_REG. - // - // If we are zero extending from 32 bits to 64 bits, it's possible that - // the instruction implicitly does the zero extend for us. In that case, - // we only need the SUBREG_TO_REG. if (IsGPR && SrcSize == 32 && DstSize == 64) { - // Unlike with the G_LOAD case, we don't want to look through copies - // here. (See isDef32.) - MachineInstr *Def = MRI.getVRegDef(SrcReg); - Register SubregToRegSrc = SrcReg; - - // Does the instruction implicitly zero extend? - if (!Def || !isDef32(*Def)) { - // No. Zero out using an OR. - Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - const Register ZReg = AArch64::WZR; - MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0); - SubregToRegSrc = OrDst; - } + Register SubregToRegSrc = + MRI.createVirtualRegister(&AArch64::GPR32RegClass); + const Register ZReg = AArch64::WZR; + MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg}) + .addImm(0); MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) .addImm(0) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir @@ -424,7 +424,8 @@ ; CHECK-NEXT: %add_rhs:gpr64 = COPY $x2 ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv - ; CHECK-NEXT: %cmp_ext:gpr64 = SUBREG_TO_REG 0, %cmp, %subreg.sub_32 + ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %cmp, 0 + ; CHECK-NEXT: %cmp_ext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 ; CHECK-NEXT: %add:gpr64 = ADDXrr %cmp_ext, %add_rhs ; CHECK-NEXT: %or:gpr64 = ORRXrr %add, %cmp_ext ; CHECK-NEXT: $x0 = COPY %or @@ -459,7 +460,8 @@ ; CHECK-NEXT: %add_rhs:gpr64 = COPY $x2 ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv - ; CHECK-NEXT: %cmp_ext:gpr64 = SUBREG_TO_REG 0, %cmp, %subreg.sub_32 + ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %cmp, 0 + ; CHECK-NEXT: %cmp_ext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 ; CHECK-NEXT: %add:gpr64 = ADDXrr %cmp_ext, %add_rhs ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], %cmp, %subreg.sub_32 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir @@ -60,7 +60,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $wzr ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 4, 0, implicit-def $nzcv - ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64common = SUBREG_TO_REG 0, [[SUBSWri]], %subreg.sub_32 + ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[SUBSWri]], 0 + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64common = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 ; CHECK-NEXT: [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[SUBREG_TO_REG]], 71, 0, implicit-def $nzcv ; CHECK-NEXT: Bcc 8, %bb.4, implicit $nzcv ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext.mir @@ -16,12 +16,14 @@ ; CHECK-LABEL: name: fold ; CHECK: liveins: $w0, $w1 - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY1]], [[COPY]] - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ADDWrr]], %subreg.sub_32 - ; CHECK: $x0 = COPY [[SUBREG_TO_REG]] - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY1]], [[COPY]] + ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[ADDWrr]], 0 + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 + ; CHECK-NEXT: $x0 = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:gpr(s32) = COPY $w0 %1:gpr(s32) = COPY $w1 %2:gpr(s32) = G_ADD %1, %0 @@ -44,11 +46,12 @@ ; CHECK-LABEL: name: dont_fold_s16 ; CHECK: liveins: $w0, $w1 - ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[DEF]], %subreg.sub_32 - ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 15 - ; CHECK: $x0 = COPY [[UBFMXri]] - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[DEF]], %subreg.sub_32 + ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 15 + ; CHECK-NEXT: $x0 = COPY [[UBFMXri]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:gpr(s16) = G_IMPLICIT_DEF %3:gpr(s64) = G_ZEXT %0(s16) $x0 = COPY %3(s64) @@ -68,11 +71,12 @@ ; CHECK-LABEL: name: dont_fold_copy ; CHECK: liveins: $w0 - ; CHECK: %copy:gpr32 = COPY $w0 - ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %copy, 0 - ; CHECK: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 - ; CHECK: $x0 = COPY %zext - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:gpr32 = COPY $w0 + ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %copy, 0 + ; CHECK-NEXT: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 + ; CHECK-NEXT: $x0 = COPY %zext + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy:gpr(s32) = COPY $w0 %zext:gpr(s64) = G_ZEXT %copy(s32) $x0 = COPY %zext(s64) @@ -92,12 +96,13 @@ ; CHECK-LABEL: name: dont_fold_bitcast ; CHECK: liveins: $w0 - ; CHECK: %copy:gpr32all = COPY $w0 - ; CHECK: %bitcast1:gpr32 = COPY %copy - ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %bitcast1, 0 - ; CHECK: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 - ; CHECK: $x0 = COPY %zext - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:gpr32all = COPY $w0 + ; CHECK-NEXT: %bitcast1:gpr32 = COPY %copy + ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %bitcast1, 0 + ; CHECK-NEXT: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 + ; CHECK-NEXT: $x0 = COPY %zext + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy:gpr(s32) = COPY $w0 %bitcast0:gpr(<4 x s8>) = G_BITCAST %copy(s32) %bitcast1:gpr(s32) = G_BITCAST %bitcast0 @@ -119,12 +124,13 @@ ; CHECK-LABEL: name: dont_fold_trunc ; CHECK: liveins: $x0 - ; CHECK: %copy:gpr64sp = COPY $x0 - ; CHECK: %trunc:gpr32common = COPY %copy.sub_32 - ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %trunc, 0 - ; CHECK: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 - ; CHECK: $x0 = COPY %zext - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:gpr64sp = COPY $x0 + ; CHECK-NEXT: %trunc:gpr32common = COPY %copy.sub_32 + ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %trunc, 0 + ; CHECK-NEXT: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 + ; CHECK-NEXT: $x0 = COPY %zext + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy:gpr(s64) = COPY $x0 %trunc:gpr(s32) = G_TRUNC %copy(s64) %zext:gpr(s64) = G_ZEXT %trunc(s32) @@ -140,21 +146,25 @@ body: | ; CHECK-LABEL: name: dont_fold_phi ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: %copy1:gpr32all = COPY $w0 - ; CHECK: %copy2:gpr32all = COPY $w1 - ; CHECK: %cond_wide:gpr32 = COPY $w2 - ; CHECK: TBNZW %cond_wide, 0, %bb.1 - ; CHECK: B %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: bb.2: - ; CHECK: %phi:gpr32 = PHI %copy1, %bb.0, %copy2, %bb.1 - ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %phi, 0 - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 - ; CHECK: $x0 = COPY [[SUBREG_TO_REG]] - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy1:gpr32all = COPY $w0 + ; CHECK-NEXT: %copy2:gpr32all = COPY $w1 + ; CHECK-NEXT: %cond_wide:gpr32 = COPY $w2 + ; CHECK-NEXT: TBNZW %cond_wide, 0, %bb.1 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: %phi:gpr32 = PHI %copy1, %bb.0, %copy2, %bb.1 + ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %phi, 0 + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 + ; CHECK-NEXT: $x0 = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 ; We should have a ORRWrs here, because isDef32 disallows phis. bb.0: @@ -188,13 +198,14 @@ ; CHECK-LABEL: name: dont_look_through_copy ; CHECK: liveins: $w0, $w1 - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY1]], [[COPY]] - ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[ADDWrr]], 0 - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 - ; CHECK: $x0 = COPY [[SUBREG_TO_REG]] - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY1]], [[COPY]] + ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[ADDWrr]], 0 + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 + ; CHECK-NEXT: $x0 = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:gpr(s32) = COPY $w0 %1:gpr(s32) = COPY $w1 %2:gpr(s32) = G_ADD %1, %0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir @@ -574,7 +574,8 @@ ; CHECK-NEXT: %cond:gpr32common = COPY %reg0.sub_32 ; CHECK-NEXT: %t:gpr64 = COPY $x2 ; CHECK-NEXT: %negative_one:gpr32 = MOVi32imm -1 - ; CHECK-NEXT: %zext:gpr64 = SUBREG_TO_REG 0, %negative_one, %subreg.sub_32 + ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %negative_one, 0 + ; CHECK-NEXT: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 ; CHECK-NEXT: %xor:gpr64 = EORXrr %reg1, %zext ; CHECK-NEXT: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri %cond, 0, implicit-def $nzcv ; CHECK-NEXT: %select:gpr64 = CSELXr %t, %xor, 1, implicit $nzcv diff --git a/llvm/test/CodeGen/AArch64/pr58431.ll b/llvm/test/CodeGen/AArch64/pr58431.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pr58431.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=0 | FileCheck %s + +define i32 @f(i64 %0) { +; CHECK-LABEL: f: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #10 +; CHECK-NEXT: mov w9, w0 +; CHECK-NEXT: udiv x10, x9, x8 +; CHECK-NEXT: msub x0, x10, x8, x9 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %2 = trunc i64 %0 to i32 + %3 = freeze i32 %2 + %4 = zext i32 %3 to i64 + %5 = urem i64 %4, 10 + %6 = trunc i64 %5 to i32 + ret i32 %6 +}