diff --git a/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp --- a/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp +++ b/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp @@ -165,7 +165,7 @@ switch (Opc) { default: break; - // FIXME: Lots more possibilities. + // FIXME: Support mul-add operations. case AArch64::ADDXrr: return AArch64::ADDv1i64; case AArch64::SUBXrr: @@ -176,6 +176,20 @@ return AArch64::EORv8i8; case AArch64::ORRXrr: return AArch64::ORRv8i8; + case AArch64::BICXrr: + return AArch64::BICv8i8; + case AArch64::ORNXrr: + return AArch64::ORNv8i8; + case AArch64::LSLVXr: + return AArch64::USHLv1i64; + case AArch64::RBITXr: + return AArch64::RBITv8i8; + case AArch64::REV16Xr: + return AArch64::REV16v8i8; + case AArch64::REV32Xr: + return AArch64::REV32v8i8; + case AArch64::REVXr: + return AArch64::REV64v8i8; } // No AdvSIMD equivalent, so just return the original opcode. return Opc; @@ -201,34 +215,22 @@ unsigned NumNewCopies = 3; unsigned NumRemovableCopies = 0; - Register OrigSrc0 = MI.getOperand(1).getReg(); - Register OrigSrc1 = MI.getOperand(2).getReg(); - unsigned SubReg0; - unsigned SubReg1; - if (!MRI->def_empty(OrigSrc0)) { - MachineRegisterInfo::def_instr_iterator Def = - MRI->def_instr_begin(OrigSrc0); - assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); - MachineOperand *MOSrc0 = getSrcFromCopy(&*Def, MRI, SubReg0); - // If the source was from a copy, we don't need to insert a new copy. - if (MOSrc0) - --NumNewCopies; - // If there are no other users of the original source, we can delete - // that instruction. - if (MOSrc0 && MRI->hasOneNonDBGUse(OrigSrc0)) - ++NumRemovableCopies; - } - if (!MRI->def_empty(OrigSrc1)) { - MachineRegisterInfo::def_instr_iterator Def = - MRI->def_instr_begin(OrigSrc1); - assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); - MachineOperand *MOSrc1 = getSrcFromCopy(&*Def, MRI, SubReg1); - if (MOSrc1) - --NumNewCopies; - // If there are no other users of the original source, we can delete - // that instruction. - if (MOSrc1 && MRI->hasOneNonDBGUse(OrigSrc1)) - ++NumRemovableCopies; + for (unsigned i = 1, e = MI.getNumExplicitOperands(); i < e; ++i) { + Register OrigSrc = MI.getOperand(i).getReg(); + unsigned SubReg; + if (!MRI->def_empty(OrigSrc)) { + MachineRegisterInfo::def_instr_iterator Def = + MRI->def_instr_begin(OrigSrc); + assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); + MachineOperand *MOSrc = getSrcFromCopy(&*Def, MRI, SubReg); + // If the source was from a copy, we don't need to insert a new copy. + if (MOSrc) + --NumNewCopies; + // If there are no other users of the original source, we can delete + // that instruction. + if (MOSrc && MRI->hasOneNonDBGUse(OrigSrc)) + ++NumRemovableCopies; + } } // If any of the uses of the original instructions is a cross class copy, @@ -293,75 +295,56 @@ assert(OldOpc != NewOpc && "transform an instruction to itself?!"); // Check if we need a copy for the source registers. - Register OrigSrc0 = MI.getOperand(1).getReg(); - Register OrigSrc1 = MI.getOperand(2).getReg(); - unsigned Src0 = 0, SubReg0; - unsigned Src1 = 0, SubReg1; - bool KillSrc0 = false, KillSrc1 = false; - if (!MRI->def_empty(OrigSrc0)) { - MachineRegisterInfo::def_instr_iterator Def = - MRI->def_instr_begin(OrigSrc0); - assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); - MachineOperand *MOSrc0 = getSrcFromCopy(&*Def, MRI, SubReg0); - // If there are no other users of the original source, we can delete - // that instruction. - if (MOSrc0) { - Src0 = MOSrc0->getReg(); - KillSrc0 = MOSrc0->isKill(); - // Src0 is going to be reused, thus, it cannot be killed anymore. - MOSrc0->setIsKill(false); - if (MRI->hasOneNonDBGUse(OrigSrc0)) { - assert(MOSrc0 && "Can't delete copy w/o a valid original source!"); - Def->eraseFromParent(); - ++NumCopiesDeleted; + SmallVector Src; + SmallVector SubReg; + SmallVector KillSrc; + for (unsigned i = 1, e = MI.getNumExplicitOperands(); i < e; ++i) { + Register OrigSrc = MI.getOperand(i).getReg(); + Src.push_back(0); + SubReg.push_back(0); + KillSrc.push_back(false); + if (!MRI->def_empty(OrigSrc)) { + MachineRegisterInfo::def_instr_iterator Def = + MRI->def_instr_begin(OrigSrc); + assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); + MachineOperand *MOSrc = getSrcFromCopy(&*Def, MRI, SubReg.back()); + // If there are no other users of the original source, we can delete + // that instruction. + if (MOSrc) { + Src.back() = MOSrc->getReg(); + KillSrc.back() = MOSrc->isKill(); + // Src0 is going to be reused, thus, it cannot be killed anymore. + MOSrc->setIsKill(false); + if (MRI->hasOneNonDBGUse(OrigSrc)) { + assert(MOSrc && "Can't delete copy w/o a valid original source!"); + Def->eraseFromParent(); + ++NumCopiesDeleted; + } } } - } - if (!MRI->def_empty(OrigSrc1)) { - MachineRegisterInfo::def_instr_iterator Def = - MRI->def_instr_begin(OrigSrc1); - assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); - MachineOperand *MOSrc1 = getSrcFromCopy(&*Def, MRI, SubReg1); - // If there are no other users of the original source, we can delete - // that instruction. - if (MOSrc1) { - Src1 = MOSrc1->getReg(); - KillSrc1 = MOSrc1->isKill(); - // Src0 is going to be reused, thus, it cannot be killed anymore. - MOSrc1->setIsKill(false); - if (MRI->hasOneNonDBGUse(OrigSrc1)) { - assert(MOSrc1 && "Can't delete copy w/o a valid original source!"); - Def->eraseFromParent(); - ++NumCopiesDeleted; - } + + // If we weren't able to reference the original source directly, create a + // copy. + if (!Src.back()) { + SubReg.back() = 0; + Src.back() = MRI->createVirtualRegister(&AArch64::FPR64RegClass); + insertCopy(TII, MI, Src.back(), OrigSrc, KillSrc.back()); + KillSrc.back() = true; } } - // If we weren't able to reference the original source directly, create a - // copy. - if (!Src0) { - SubReg0 = 0; - Src0 = MRI->createVirtualRegister(&AArch64::FPR64RegClass); - insertCopy(TII, MI, Src0, OrigSrc0, KillSrc0); - KillSrc0 = true; - } - if (!Src1) { - SubReg1 = 0; - Src1 = MRI->createVirtualRegister(&AArch64::FPR64RegClass); - insertCopy(TII, MI, Src1, OrigSrc1, KillSrc1); - KillSrc1 = true; - } // Create a vreg for the destination. // FIXME: No need to do this if the ultimate user expects an FPR64. // Check for that and avoid the copy if possible. Register Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass); - // For now, all of the new instructions have the same simple three-register + // For now, all of the new instructions have the same simple N-register // form, so no need to special case based on what instruction we're // building. - BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), Dst) - .addReg(Src0, getKillRegState(KillSrc0), SubReg0) - .addReg(Src1, getKillRegState(KillSrc1), SubReg1); + MachineInstrBuilder NewMI = BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), Dst); + for (unsigned i = 0, e = Src.size(); i < e; ++i) { + NewMI = NewMI.addReg(Src[i], getKillRegState(KillSrc[i]), SubReg[i]); + } // Now copy the result back out to a GPR. // FIXME: Try to avoid this if all uses could actually just use the FPR64 diff --git a/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll b/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll --- a/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll +++ b/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll @@ -86,6 +86,7 @@ %retval = bitcast i64 %sub.i to double ret double %retval } + define double @and_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { ; CHECK-LABEL: and_su64: ; CHECK: // %bb.0: @@ -103,6 +104,24 @@ ret double %retval } +define double @bic_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { +; CHECK-LABEL: bic_su64: +; CHECK: // %bb.0: +; CHECK-NEXT: bic.8b v0, v1, v0 +; CHECK-NEXT: ret +; +; GENERIC-LABEL: bic_su64: +; GENERIC: // %bb.0: +; GENERIC-NEXT: bic v0.8b, v1.8b, v0.8b +; GENERIC-NEXT: ret + %vecext = extractelement <2 x i64> %a, i32 0 + %vecext1 = extractelement <2 x i64> %b, i32 0 + %not = xor i64 %vecext, -1 + %or.i = and i64 %vecext1, %not + %retval = bitcast i64 %or.i to double + ret double %retval +} + define double @orr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { ; CHECK-LABEL: orr_su64: ; CHECK: // %bb.0: @@ -120,6 +139,24 @@ ret double %retval } +define double @orn_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { +; CHECK-LABEL: orn_su64: +; CHECK: // %bb.0: +; CHECK-NEXT: orn.8b v0, v0, v1 +; CHECK-NEXT: ret +; +; GENERIC-LABEL: orn_su64: +; GENERIC: // %bb.0: +; GENERIC-NEXT: orn v0.8b, v0.8b, v1.8b +; GENERIC-NEXT: ret + %vecext = extractelement <2 x i64> %a, i32 0 + %vecext1 = extractelement <2 x i64> %b, i32 0 + %not = xor i64 %vecext1, -1 + %or.i = or i64 %vecext, %not + %retval = bitcast i64 %or.i to double + ret double %retval +} + define double @xorr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { ; CHECK-LABEL: xorr_su64: ; CHECK: // %bb.0: @@ -136,3 +173,94 @@ %retval = bitcast i64 %xor.i to double ret double %retval } + +define double @lsl_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { +; CHECK-LABEL: lsl_su64: +; CHECK: // %bb.0: +; CHECK-NEXT: ushl d0, d1, d0 +; CHECK-NEXT: ret +; +; GENERIC-LABEL: lsl_su64: +; GENERIC: // %bb.0: +; GENERIC-NEXT: ushl d0, d1, d0 +; GENERIC-NEXT: ret + %vecext = extractelement <2 x i64> %a, i32 0 + %vecext1 = extractelement <2 x i64> %b, i32 0 + %xor.i = shl i64 %vecext1, %vecext + %retval = bitcast i64 %xor.i to double + ret double %retval +} + +define double @rbit_su64(<2 x i64> %a) nounwind readnone { +; CHECK-LABEL: rbit_su64: +; CHECK: // %bb.0: +; CHECK-NEXT: rbit.8b v0, v0 +; CHECK-NEXT: ret +; +; GENERIC-LABEL: rbit_su64: +; GENERIC: // %bb.0: +; GENERIC-NEXT: rbit v0.8b, v0.8b +; GENERIC-NEXT: ret + %vecext = extractelement <2 x i64> %a, i32 0 + %rev = tail call i64 @llvm.bitreverse.i64(i64 %vecext) + %retval = bitcast i64 %rev to double + ret double %retval +} + +define double @rev16_su64(<2 x i64> %i) nounwind readnone { +; CHECK-LABEL: rev16_su64: +; CHECK: // %bb.0: +; CHECK-NEXT: rev16.8b v0, v0 +; CHECK-NEXT: ret +; +; GENERIC-LABEL: rev16_su64: +; GENERIC: // %bb.0: +; GENERIC-NEXT: rev16 v0.8b, v0.8b +; GENERIC-NEXT: ret + %vecext = extractelement <2 x i64> %i, i32 0 + %a = lshr i64 %vecext, 8 + %b = and i64 %a, 71777214294589695 + %c = shl i64 %vecext, 8 + %d = and i64 %c, -71777214294589696 + %e = or i64 %b, %d + %retval = bitcast i64 %e to double + ret double %retval +} + +define double @rev32_su64(<2 x i64> %i) nounwind readnone { +; CHECK-LABEL: rev32_su64: +; CHECK: // %bb.0: +; CHECK-NEXT: rev32.8b v0, v0 +; CHECK-NEXT: ret +; +; GENERIC-LABEL: rev32_su64: +; GENERIC: // %bb.0: +; GENERIC-NEXT: rev32 v0.8b, v0.8b +; GENERIC-NEXT: ret + %vecext = extractelement <2 x i64> %i, i32 0 + %a = tail call i64 @llvm.bswap.i64(i64 %vecext) + %b = lshr i64 %a, 32 + %c = shl i64 %a, 32 + %d = or i64 %b, %c + %retval = bitcast i64 %d to double + ret double %retval +} + +define double @rev_su64(<2 x i64> %i) nounwind readnone { +; CHECK-LABEL: rev_su64: +; CHECK: // %bb.0: +; CHECK-NEXT: rev64.8b v0, v0 +; CHECK-NEXT: ret +; +; GENERIC-LABEL: rev_su64: +; GENERIC: // %bb.0: +; GENERIC-NEXT: rev64 v0.8b, v0.8b +; GENERIC-NEXT: ret + %vecext = extractelement <2 x i64> %i, i32 0 + %a = tail call i64 @llvm.bswap.i64(i64 %vecext) + %retval = bitcast i64 %a to double + ret double %retval +} + +declare i64 @llvm.bitreverse.i64(i64) +declare i64 @llvm.bswap.i64(i64)