diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -825,9 +825,17 @@ Reg = N.getOperand(0); - // Don't match if free 32-bit -> 64-bit zext can be used instead. - if (Ext == AArch64_AM::UXTW && - Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode())) + // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the + // isDef32 as a heuristic for when the operand is likely to be a 32bit def. + auto isDef32 = [](SDValue N) { + unsigned Opc = N.getOpcode(); + return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && + Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && + Opc != ISD::AssertZext && Opc != ISD::AssertAlign && + Opc != ISD::FREEZE; + }; + if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 && + isDef32(Reg)) return false; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -457,23 +457,6 @@ } // end namespace AArch64ISD -namespace { - -// Any instruction that defines a 32-bit result zeros out the high half of the -// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may -// be copying from a truncate. But any other 32-bit operation will zero-extend -// up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper -// 32 bits, they're probably just qualifying a CopyFromReg. -static inline bool isDef32(const SDNode &N) { - unsigned Opc = N.getOpcode(); - return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && - Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && - Opc != ISD::AssertZext && Opc != ISD::AssertAlign && - Opc != ISD::FREEZE; -} - -} // end anonymous namespace - namespace AArch64 { /// Possible values of current rounding mode, which is specified in bits /// 23:22 of FPCR. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -7257,14 +7257,6 @@ //---------------------------------------------------------------------------- // FIXME: Like for X86, these should go in their own separate .td file. -def def32 : PatLeaf<(i32 GPR32:$src), [{ - return isDef32(*N); -}]>; - -// In the case of a 32-bit def that is known to implicitly zero-extend, -// we can use a SUBREG_TO_REG. -def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>; - // For an anyext, we don't care what the high bits are, so we can perform an // INSERT_SUBREF into an IMPLICIT_DEF. def : Pat<(i64 (anyext GPR32:$src)), diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -221,7 +221,30 @@ // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is // real AArch64 instruction and if it is not, do not process the opcode // conservatively. - if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) + if (SrcMI->getOpcode() == TargetOpcode::COPY && + SrcMI->getOperand(1).getReg().isVirtual()) { + const TargetRegisterClass *RC = + MRI->getRegClass(SrcMI->getOperand(1).getReg()); + + // A COPY from an FPR will become a FMOVSWr, so do so now so that we know + // that the upper bits are zero. + if (RC != &AArch64::FPR32RegClass && + ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) || + SrcMI->getOperand(1).getSubReg() != AArch64::ssub)) + return false; + Register CpySrc = SrcMI->getOperand(1).getReg(); + if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) { + CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass); + BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(), + TII->get(TargetOpcode::COPY), CpySrc) + .add(SrcMI->getOperand(1)); + } + BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(), + TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg()) + .addReg(CpySrc); + ToBeRemoved.insert(SrcMI); + } + else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) return false; Register DefReg = MI.getOperand(0).getReg(); diff --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll --- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll @@ -35,8 +35,8 @@ ; CHECK-LABEL: cnt32_advsimd_2: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: cnt.8b v0, v0 ; CHECK-NEXT: uaddlv.8b h0, v0 ; CHECK-NEXT: fmov w0, s0 diff --git a/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll b/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll --- a/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll +++ b/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll @@ -328,7 +328,8 @@ define i64 @sign_extend_inreg_isdef32(i64) { ; CHECK-LABEL: sign_extend_inreg_isdef32: ; CHECK: ; %bb.0: -; CHECK-NEXT: sbfx x0, x0, #32, #16 +; CHECK-NEXT: sbfx x8, x0, #32, #16 +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %2 = lshr i64 %0, 32 %3 = shl i64 %2, 16 diff --git a/llvm/test/CodeGen/AArch64/dp1.ll b/llvm/test/CodeGen/AArch64/dp1.ll --- a/llvm/test/CodeGen/AArch64/dp1.ll +++ b/llvm/test/CodeGen/AArch64/dp1.ll @@ -246,7 +246,6 @@ ; CHECK-GISEL-NEXT: cnt v0.8b, v0.8b ; CHECK-GISEL-NEXT: uaddlv h0, v0.8b ; CHECK-GISEL-NEXT: fmov w9, s0 -; CHECK-GISEL-NEXT: mov w9, w9 ; CHECK-GISEL-NEXT: str x9, [x8] ; CHECK-GISEL-NEXT: ret %val0_tmp = load i64, i64* @var64