diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2605,13 +2605,44 @@ // %v2(s32) = G_ZEXT %v(s8) if (!IsSigned) { auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); - if (LoadMI && - RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) { + bool IsGPR = + RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID; + if (LoadMI && IsGPR) { const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); unsigned BytesLoaded = MemOp->getSize(); if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) return selectCopy(I, TII, MRI, TRI, RBI); } + + // If we are zero extending from 32 bits to 64 bits, it's possible that + // the instruction implicitly does the zero extend for us. In that case, + // we can just emit a SUBREG_TO_REG. + if (IsGPR && SrcSize == 32 && DstSize == 64) { + // Unlike with the G_LOAD case, we don't want to look through copies + // here. + MachineInstr *Def = MRI.getVRegDef(SrcReg); + if (Def && isDef32(*Def)) { + MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) + .addImm(0) + .addUse(SrcReg) + .addImm(AArch64::sub_32); + + if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, + MRI)) { + LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n"); + return false; + } + + if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, + MRI)) { + LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n"); + return false; + } + + I.eraseFromParent(); + return true; + } + } } if (DstSize == 64) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir @@ -59,15 +59,14 @@ ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $wzr ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 4, 0, implicit-def $nzcv - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[SUBSWri]], %subreg.sub_32 - ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[SUBREG_TO_REG]], 0, 31 - ; CHECK: $xzr = SUBSXri [[UBFMXri]], 71, 0, implicit-def $nzcv + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64common = SUBREG_TO_REG 0, [[SUBSWri]], %subreg.sub_32 + ; CHECK: $xzr = SUBSXri [[SUBREG_TO_REG]], 71, 0, implicit-def $nzcv ; CHECK: Bcc 8, %bb.4, implicit $nzcv ; CHECK: bb.1.entry: ; CHECK: successors: %bb.3(0x2aaaaaab), %bb.4(0x2aaaaaab), %bb.2(0x2aaaaaab) ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr ; CHECK: [[MOVaddrJT:%[0-9]+]]:gpr64 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0 - ; CHECK: early-clobber %18:gpr64, early-clobber %19:gpr64sp = JumpTableDest32 [[MOVaddrJT]], [[UBFMXri]], %jump-table.0 + ; CHECK: early-clobber %18:gpr64, early-clobber %19:gpr64sp = JumpTableDest32 [[MOVaddrJT]], [[SUBREG_TO_REG]], %jump-table.0 ; CHECK: BR %18 ; CHECK: bb.2.sw.bb: ; CHECK: successors: %bb.4(0x80000000) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext.mir @@ -0,0 +1,204 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: fold +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; This should not have an UBFMXri, since ADDWrr implicitly gives us the + ; zext. + + ; CHECK-LABEL: name: fold + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY1]], [[COPY]] + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ADDWrr]], %subreg.sub_32 + ; CHECK: $x0 = COPY [[SUBREG_TO_REG]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = COPY $w1 + %2:gpr(s32) = G_ADD %1, %0 + %3:gpr(s64) = G_ZEXT %2(s32) + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: dont_fold_s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; We should have a UBFMXri here, because we only do this for zero extends + ; from 32 bits to 64 bits. + + ; CHECK-LABEL: name: dont_fold_s16 + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[DEF]], %subreg.sub_32 + ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 15 + ; CHECK: $x0 = COPY [[UBFMXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s16) = G_IMPLICIT_DEF + %3:gpr(s64) = G_ZEXT %0(s16) + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: dont_fold_copy +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; We should have a UBFMXri here, because isDef32 disallows copies. + + ; CHECK-LABEL: name: dont_fold_copy + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %copy, %subreg.sub_32 + ; CHECK: %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 + ; CHECK: $x0 = COPY %zext + ; CHECK: RET_ReallyLR implicit $x0 + %copy:gpr(s32) = COPY $w0 + %zext:gpr(s64) = G_ZEXT %copy(s32) + $x0 = COPY %zext(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: dont_fold_bitcast +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; We should have a UBFMXri here, because isDef32 disallows bitcasts. + + ; CHECK-LABEL: name: dont_fold_bitcast + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr32all = COPY $w0 + ; CHECK: %bitcast:gpr32 = COPY %copy + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %bitcast, %subreg.sub_32 + ; CHECK: %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 + ; CHECK: $x0 = COPY %zext + ; CHECK: RET_ReallyLR implicit $x0 + %copy:gpr(s32) = COPY $w0 + %bitcast:gpr(s32) = G_BITCAST %copy(s32) + %zext:gpr(s64) = G_ZEXT %bitcast(s32) + $x0 = COPY %zext(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: dont_fold_trunc +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; We should have a UBFMXri here, because isDef32 disallows truncs. + + ; CHECK-LABEL: name: dont_fold_trunc + ; CHECK: liveins: $x0 + ; CHECK: %copy:gpr64sp = COPY $x0 + ; CHECK: %trunc:gpr32common = COPY %copy.sub_32 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %trunc, %subreg.sub_32 + ; CHECK: %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 + ; CHECK: $x0 = COPY %zext + ; CHECK: RET_ReallyLR implicit $x0 + %copy:gpr(s64) = COPY $x0 + %trunc:gpr(s32) = G_TRUNC %copy(s64) + %zext:gpr(s64) = G_ZEXT %trunc(s32) + $x0 = COPY %zext(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: dont_fold_phi +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: dont_fold_phi + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %copy1:gpr32all = COPY $w0 + ; CHECK: %copy2:gpr32all = COPY $w1 + ; CHECK: %cond_wide:gpr32 = COPY $w2 + ; CHECK: TBNZW %cond_wide, 0, %bb.1 + ; CHECK: B %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: bb.2: + ; CHECK: %phi:gpr32 = PHI %copy1, %bb.0, %copy2, %bb.1 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %phi, %subreg.sub_32 + ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 + ; CHECK: $x0 = COPY [[UBFMXri]] + ; CHECK: RET_ReallyLR implicit $x0 + ; We should have a UBFMXri here, because isDef32 disallows phis. + + bb.0: + liveins: $w0, $w1, $w2 + + %copy1:gpr(s32) = COPY $w0 + %copy2:gpr(s32) = COPY $w1 + %cond_wide:gpr(s32) = COPY $w2 + %cond:gpr(s1) = G_TRUNC %cond_wide(s32) + G_BRCOND %cond(s1), %bb.1 + G_BR %bb.2 + + bb.1: + + bb.2: + %phi:gpr(s32) = G_PHI %copy1(s32), %bb.0, %copy2(s32), %bb.1 + %5:gpr(s64) = G_ZEXT %phi(s32) + $x0 = COPY %5(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: dont_look_through_copy +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; Make sure we don't walk past the copy. + + ; CHECK-LABEL: name: dont_look_through_copy + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY1]], [[COPY]] + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ADDWrr]], %subreg.sub_32 + ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 + ; CHECK: $x0 = COPY [[UBFMXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = COPY $w1 + %2:gpr(s32) = G_ADD %1, %0 + %3:gpr(s32) = COPY %2(s32) + %4:gpr(s64) = G_ZEXT %3(s32) + $x0 = COPY %4(s64) + RET_ReallyLR implicit $x0