2020-12-20 17:55:56 +01:00
|
|
|
from pathlib import Path
|
2021-05-18 05:51:33 +02:00
|
|
|
|
2021-03-11 12:33:54 +01:00
|
|
|
import polyglot
|
2021-05-18 05:51:33 +02:00
|
|
|
from argostranslate import package, translate
|
|
|
|
|
|
|
|
import app.language
|
2020-12-19 23:40:37 +01:00
|
|
|
|
2021-05-18 05:41:02 +02:00
|
|
|
|
2021-03-28 16:19:19 +02:00
|
|
|
def boot(load_only=None):
|
2021-05-16 16:42:58 +02:00
|
|
|
try:
|
|
|
|
check_and_install_models(load_only_lang_codes=load_only)
|
|
|
|
check_and_install_transliteration()
|
|
|
|
except Exception as e:
|
|
|
|
print("Cannot update models (normal if you're offline): %s" % str(e))
|
2020-12-19 23:40:37 +01:00
|
|
|
|
2021-05-18 05:41:02 +02:00
|
|
|
|
2021-03-28 16:19:19 +02:00
|
|
|
def check_and_install_models(force=False, load_only_lang_codes=None):
|
2021-02-09 14:12:32 +01:00
|
|
|
if len(package.get_installed_packages()) < 2 or force:
|
2021-02-08 16:56:45 +01:00
|
|
|
# Update package definitions from remote
|
|
|
|
print("Updating language models")
|
|
|
|
package.update_package_index()
|
|
|
|
|
|
|
|
# Load available packages from local package index
|
|
|
|
available_packages = package.load_available_packages()
|
|
|
|
print("Found %s models" % len(available_packages))
|
|
|
|
|
2021-03-28 16:19:19 +02:00
|
|
|
if load_only_lang_codes is not None:
|
|
|
|
# load_only_lang_codes: List[str] (codes)
|
|
|
|
# Ensure the user does not use any unavailable language code.
|
|
|
|
unavailable_lang_codes = set(load_only_lang_codes)
|
|
|
|
for pack in available_packages:
|
|
|
|
unavailable_lang_codes -= {pack.from_code, pack.to_code}
|
|
|
|
if unavailable_lang_codes:
|
2021-05-18 05:41:02 +02:00
|
|
|
raise ValueError(
|
|
|
|
"Unavailable language codes: %s."
|
|
|
|
% ",".join(sorted(unavailable_lang_codes))
|
|
|
|
)
|
2021-03-28 16:19:19 +02:00
|
|
|
# Keep only the packages that have both from_code and to_code in our list.
|
|
|
|
available_packages = [
|
|
|
|
pack
|
|
|
|
for pack in available_packages
|
2021-05-18 05:51:33 +02:00
|
|
|
if pack.from_code in load_only_lang_codes and pack.to_code in load_only_lang_codes
|
2021-03-28 16:19:19 +02:00
|
|
|
]
|
|
|
|
if not available_packages:
|
2021-05-18 05:41:02 +02:00
|
|
|
raise ValueError("no available package")
|
2021-03-28 16:19:19 +02:00
|
|
|
print("Keep %s models" % len(available_packages))
|
|
|
|
|
2021-02-08 16:56:45 +01:00
|
|
|
# Download and install all available packages
|
|
|
|
for available_package in available_packages:
|
2021-05-18 05:41:02 +02:00
|
|
|
print(
|
|
|
|
"Downloading %s (%s) ..."
|
|
|
|
% (available_package, available_package.package_version)
|
|
|
|
)
|
2021-02-08 16:56:45 +01:00
|
|
|
download_path = available_package.download()
|
|
|
|
package.install_from_path(download_path)
|
|
|
|
|
2021-03-11 14:38:55 +01:00
|
|
|
# reload installed languages
|
2021-03-28 23:26:31 +02:00
|
|
|
app.language.languages = translate.load_installed_languages()
|
2021-05-18 05:41:02 +02:00
|
|
|
print(
|
|
|
|
"Loaded support for %s languages (%s models total)!"
|
|
|
|
% (len(translate.load_installed_languages()), len(available_packages))
|
|
|
|
)
|
2021-03-11 12:33:54 +01:00
|
|
|
|
|
|
|
|
|
|
|
def check_and_install_transliteration(force=False):
|
|
|
|
# 'en' is not a supported transliteration language
|
2021-05-18 05:41:02 +02:00
|
|
|
transliteration_languages = [
|
|
|
|
l.code for l in app.language.languages if l.code != "en"
|
|
|
|
]
|
2021-03-11 12:33:54 +01:00
|
|
|
|
|
|
|
# check installed
|
|
|
|
install_needed = []
|
|
|
|
if not force:
|
|
|
|
t_packages_path = Path(polyglot.polyglot_path) / "transliteration2"
|
|
|
|
for lang in transliteration_languages:
|
2021-05-18 05:41:02 +02:00
|
|
|
if not (
|
|
|
|
t_packages_path / lang / f"transliteration.{lang}.tar.bz2"
|
|
|
|
).exists():
|
2021-03-11 12:33:54 +01:00
|
|
|
install_needed.append(lang)
|
|
|
|
else:
|
|
|
|
install_needed = transliteration_languages
|
|
|
|
|
|
|
|
# install the needed transliteration packages
|
|
|
|
if install_needed:
|
2021-05-18 05:41:02 +02:00
|
|
|
print(
|
|
|
|
f"Installing transliteration models for the following languages: {', '.join(install_needed)}"
|
|
|
|
)
|
2021-03-11 14:39:28 +01:00
|
|
|
|
2021-03-11 12:33:54 +01:00
|
|
|
from polyglot.downloader import Downloader
|
2021-05-18 05:41:02 +02:00
|
|
|
|
2021-03-11 12:33:54 +01:00
|
|
|
downloader = Downloader()
|
|
|
|
|
|
|
|
for lang in install_needed:
|
|
|
|
downloader.download(f"transliteration2.{lang}")
|