Index: llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -608,23 +608,16 @@ return true; } -static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI) { - // ToDo: check and add more MIs which set zero for high 64bits. - switch (MI->getOpcode()) { - default: - break; - case AArch64::FCVTNv2i32: - case AArch64::FCVTNv4i16: - case AArch64::RSHRNv2i32_shift: - case AArch64::RSHRNv4i16_shift: - case AArch64::RSHRNv8i8_shift : - case AArch64::SHRNv2i32_shift: - case AArch64::SHRNv4i16_shift: - case AArch64::SHRNv8i8_shift: - return true; - } - - return false; +// All instructions that set a FPR64 will implicitly zero the top bits of the +// register. +static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI, + MachineRegisterInfo *MRI) { + if (!MI->getOperand(0).isDef() || !MI->getOperand(0).isReg()) + return false; + const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg()); + if (RC != &AArch64::FPR64RegClass) + return false; + return MI->getOpcode() > TargetOpcode::GENERIC_OP_END; } bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) { @@ -639,7 +632,7 @@ if (Low64MI->getOpcode() != AArch64::INSERT_SUBREG) return false; Low64MI = MRI->getUniqueVRegDef(Low64MI->getOperand(2).getReg()); - if (!is64bitDefwithZeroHigh64bit(Low64MI)) + if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI)) return false; // Check there is `mov 0` MI for high 64-bits. @@ -656,13 +649,13 @@ // %7:fpr128 = INSERT_SUBREG %8:fpr128(tied-def 0), killed %6:fpr64, %subreg.dsub // %11:fpr128 = INSvi64lane %9:fpr128(tied-def 0), 1, killed %7:fpr128, 0 MachineInstr *High64MI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg()); - if (High64MI->getOpcode() != AArch64::INSERT_SUBREG) + if (!High64MI || High64MI->getOpcode() != AArch64::INSERT_SUBREG) return false; High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(2).getReg()); - if (High64MI->getOpcode() == TargetOpcode::COPY) + if (High64MI && High64MI->getOpcode() == TargetOpcode::COPY) High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(1).getReg()); - if (High64MI->getOpcode() != AArch64::MOVID && - High64MI->getOpcode() != AArch64::MOVIv2d_ns) + if (!High64MI || (High64MI->getOpcode() != AArch64::MOVID && + High64MI->getOpcode() != AArch64::MOVIv2d_ns)) return false; if (High64MI->getOperand(1).getImm() != 0) return false; Index: llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll +++ llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll @@ -191,13 +191,11 @@ ; CHECK-NEXT: cmp x0, #0 ; CHECK-NEXT: ldr q0, [x2] ; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: movi v2.2d, #0000000000000000 ; CHECK-NEXT: cmtst v0.8h, v0.8h, v0.8h ; CHECK-NEXT: dup v1.8h, w8 ; CHECK-NEXT: cmeq v1.8h, v1.8h, #0 ; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b ; CHECK-NEXT: xtn v0.8b, v0.8h -; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret %tmp = xor <16 x i1> zeroinitializer, Index: llvm/test/CodeGen/AArch64/implicitly-set-zero-high-64-bits.ll =================================================================== --- llvm/test/CodeGen/AArch64/implicitly-set-zero-high-64-bits.ll +++ llvm/test/CodeGen/AArch64/implicitly-set-zero-high-64-bits.ll @@ -51,9 +51,7 @@ define <8 x i16> @addpv4i16(<4 x i16> noundef %a, <4 x i16> noundef %b) { ; CHECK-LABEL: addpv4i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0000000000000000 ; CHECK-NEXT: addp v0.4h, v0.4h, v1.4h -; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: ret entry: %vpadd_v2.i = tail call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b) @@ -64,9 +62,7 @@ define <8 x i16> @addv4i16(<4 x i16> noundef %a, <4 x i16> noundef %b) { ; CHECK-LABEL: addv4i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0000000000000000 ; CHECK-NEXT: add v0.4h, v1.4h, v0.4h -; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: ret entry: %add.i = add <4 x i16> %b, %a @@ -88,9 +84,7 @@ define <16 x i8> @tbl1(<16 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: tbl1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0000000000000000 ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b -; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: ret entry: %vtbl11 = tail call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) @@ -116,9 +110,7 @@ define <16 x i8> @bsl(<4 x i16> noundef %a, <4 x i16> noundef %c, <4 x i16> noundef %d, <4 x i16> noundef %b) { ; CHECK-LABEL: bsl: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v3.2d, #0000000000000000 ; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b -; CHECK-NEXT: mov v0.d[1], v3.d[0] ; CHECK-NEXT: ret entry: %vbsl3.i = and <4 x i16> %c, %a Index: llvm/test/CodeGen/AArch64/peephole-insvigpr.mir =================================================================== --- llvm/test/CodeGen/AArch64/peephole-insvigpr.mir +++ llvm/test/CodeGen/AArch64/peephole-insvigpr.mir @@ -434,8 +434,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], killed [[COPY2]], %subreg.dsub ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], killed [[FADDDrr]], %subreg.dsub - ; CHECK-NEXT: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG1]], 1, killed [[INSERT_SUBREG]], 0 - ; CHECK-NEXT: $q0 = COPY [[INSvi64lane]] + ; CHECK-NEXT: $q0 = COPY [[INSERT_SUBREG1]] ; CHECK-NEXT: RET_ReallyLR implicit $q0 %1:fpr64 = COPY $d1 %0:fpr64 = COPY $d0