Stirling-PDF/scripts/counter_translation.py

"""A script to update language progress status in README.md based on
properties file comparison.

This script compares default properties file with others in a directory to
determine language progress.
It then updates README.md based on provided progress list.

Author: Ludy87

Example:
    To use this script, simply run it from command line:
        $ python counter_translation.py
"""  # noqa: D205

import glob
import os
import re

import tomlkit
import tomlkit.toml_file


def convert_to_multiline(data: tomlkit.TOMLDocument) -> tomlkit.TOMLDocument:
    """Converts 'ignore' and 'missing' arrays to multiline arrays and sorts the first-level keys of the TOML document.
    Enhances readability and consistency in the TOML file by ensuring arrays contain unique and sorted entries.

    Parameters:
        data (tomlkit.TOMLDocument): The original TOML document containing the data.

    Returns:
        tomlkit.TOMLDocument: A new TOML document with sorted keys and properly formatted arrays.
    """  # noqa: D205
    sorted_data = tomlkit.document()
    for key in sorted(data.keys()):
        value = data[key]
        if isinstance(value, dict):
            new_table = tomlkit.table()
            for subkey in ("ignore", "missing"):
                if subkey in value:
                    # Convert the list to a set to remove duplicates, sort it, and convert to multiline for readability
                    unique_sorted_array = sorted(set(value[subkey]))
                    array = tomlkit.array()
                    array.multiline(True)
                    for item in unique_sorted_array:
                        array.append(item)
                    new_table[subkey] = array
            sorted_data[key] = new_table
        else:
            # Add other types of data unchanged
            sorted_data[key] = value
    return sorted_data


def write_readme(progress_list: list[tuple[str, int]]) -> None:
    """Updates the progress status in the README.md file based
    on the provided progress list.

    Parameters:
        progress_list (list[tuple[str, int]]): A list of tuples containing
        language and progress percentage.

    Returns:
        None
    """  # noqa: D205
    with open("README.md", encoding="utf-8") as file:
        content = file.readlines()

    for i, line in enumerate(content[2:], start=2):
        for progress in progress_list:
            language, value = progress
            if language in line:
                if match := re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line):
                    content[i] = line.replace(
                        match.group(0),
                        f"![{value}%](https://geps.dev/progress/{value})",
                    )

    with open("README.md", "w", encoding="utf-8") as file:
        file.writelines(content)


def compare_files(default_file_path, file_paths, translation_status_file) -> list[tuple[str, int]]:
    """Compares the default properties file with other
    properties files in the directory.

    Parameters:
        default_file_path (str): The path to the default properties file.
        files_directory (str): The directory containing other properties files.

    Returns:
        list[tuple[str, int]]: A list of tuples containing
        language and progress percentage.
    """  # noqa: D205
    num_lines = sum(
        1 for line in open(default_file_path, encoding="utf-8") if line.strip() and not line.strip().startswith("#")
    )

    result_list = []
    sort_translation_status: tomlkit.TOMLDocument

    # read toml
    with open(translation_status_file, encoding="utf-8") as f:
        sort_translation_status = tomlkit.parse(f.read())

    for file_path in file_paths:
        language = os.path.basename(file_path).split("messages_", 1)[1].split(".properties", 1)[0]

        fails = 0
        if "en_GB" in language or "en_US" in language:
            result_list.append(("en_GB", 100))
            result_list.append(("en_US", 100))
            continue

        if language not in sort_translation_status:
            sort_translation_status[language] = tomlkit.table()

        if (
            "ignore" not in sort_translation_status[language]
            or len(sort_translation_status[language].get("ignore", [])) < 1
        ):
            sort_translation_status[language]["ignore"] = tomlkit.array(["language.direction"])

        # if "missing" not in sort_translation_status[language]:
        #     sort_translation_status[language]["missing"] = tomlkit.array()
        # elif "language.direction" in sort_translation_status[language]["missing"]:
        #     sort_translation_status[language]["missing"].remove("language.direction")

        with open(default_file_path, encoding="utf-8") as default_file, open(file_path, encoding="utf-8") as file:
            for _ in range(5):
                next(default_file)
                try:
                    next(file)
                except StopIteration:
                    fails = num_lines

            for line_num, (line_default, line_file) in enumerate(zip(default_file, file), start=6):
                try:
                    # Ignoring empty lines and lines start with #
                    if line_default.strip() == "" or line_default.startswith("#"):
                        continue

                    default_key, default_value = line_default.split("=", 1)
                    file_key, file_value = line_file.split("=", 1)
                    if (
                        default_value.strip() == file_value.strip()
                        and default_key.strip() not in sort_translation_status[language]["ignore"]
                    ):
                        print(f"{language}: Line {line_num} is missing the translation.")
                        # if default_key.strip() not in sort_translation_status[language]["missing"]:
                        #     missing_array = tomlkit.array()
                        #     missing_array.append(default_key.strip())
                        #     missing_array.multiline(True)
                        #     sort_translation_status[language]["missing"].extend(missing_array)
                        fails += 1
                    # elif default_key.strip() in sort_translation_status[language]["ignore"]:
                    #     if default_key.strip() in sort_translation_status[language]["missing"]:
                    #         sort_translation_status[language]["missing"].remove(default_key.strip())
                    if default_value.strip() != file_value.strip():
                        # if default_key.strip() in sort_translation_status[language]["missing"]:
                        #     sort_translation_status[language]["missing"].remove(default_key.strip())
                        if default_key.strip() in sort_translation_status[language]["ignore"]:
                            sort_translation_status[language]["ignore"].remove(default_key.strip())

                except IndexError:
                    pass

        print(f"{language}: {fails} out of {num_lines} lines are not translated.")
        result_list.append(
            (
                language,
                int((num_lines - fails) * 100 / num_lines),
            )
        )
    translation_status = convert_to_multiline(sort_translation_status)
    with open(translation_status_file, "w", encoding="utf-8") as file:
        file.write(tomlkit.dumps(translation_status))

    unique_data = list(set(result_list))
    unique_data.sort(key=lambda x: x[1], reverse=True)

    return unique_data


if __name__ == "__main__":
    directory = os.path.join(os.getcwd(), "src", "main", "resources")
    messages_file_paths = glob.glob(os.path.join(directory, "messages_*.properties"))
    reference_file = os.path.join(directory, "messages_en_GB.properties")

    scripts_directory = os.path.join(os.getcwd(), "scripts")
    translation_state_file = os.path.join(scripts_directory, "translation_status.toml")

    write_readme(compare_files(reference_file, messages_file_paths, translation_state_file))
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00			`"""A script to update language progress status in README.md based on`
			`properties file comparison.`

			`This script compares default properties file with others in a directory to`
			`determine language progress.`
			`It then updates README.md based on provided progress list.`

			`Author: Ludy87`

			`Example:`
			`To use this script, simply run it from command line:`
			`$ python counter_translation.py`
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`""" # noqa: D205`

Update progress of language (#1029) 2024-04-04 11:16:10 +02:00			`import glob`
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`import os`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00			`import re`

Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`import tomlkit`
			`import tomlkit.toml_file`


			`def convert_to_multiline(data: tomlkit.TOMLDocument) -> tomlkit.TOMLDocument:`
			`"""Converts 'ignore' and 'missing' arrays to multiline arrays and sorts the first-level keys of the TOML document.`
			`Enhances readability and consistency in the TOML file by ensuring arrays contain unique and sorted entries.`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`Parameters:`
			`data (tomlkit.TOMLDocument): The original TOML document containing the data.`

			`Returns:`
			`tomlkit.TOMLDocument: A new TOML document with sorted keys and properly formatted arrays.`
			`""" # noqa: D205`
			`sorted_data = tomlkit.document()`
			`for key in sorted(data.keys()):`
			`value = data[key]`
			`if isinstance(value, dict):`
			`new_table = tomlkit.table()`
			`for subkey in ("ignore", "missing"):`
			`if subkey in value:`
			`# Convert the list to a set to remove duplicates, sort it, and convert to multiline for readability`
			`unique_sorted_array = sorted(set(value[subkey]))`
			`array = tomlkit.array()`
			`array.multiline(True)`
			`for item in unique_sorted_array:`
			`array.append(item)`
			`new_table[subkey] = array`
			`sorted_data[key] = new_table`
			`else:`
			`# Add other types of data unchanged`
			`sorted_data[key] = value`
			`return sorted_data`


			`def write_readme(progress_list: list[tuple[str, int]]) -> None:`
			`"""Updates the progress status in the README.md file based`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00			`on the provided progress list.`

			`Parameters:`
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`progress_list (list[tuple[str, int]]): A list of tuples containing`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00			`language and progress percentage.`

			`Returns:`
			`None`
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`""" # noqa: D205`
			`with open("README.md", encoding="utf-8") as file:`
			`content = file.readlines()`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`for i, line in enumerate(content[2:], start=2):`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00			`for progress in progress_list:`
			`language, value = progress`
			`if language in line:`
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`if match := re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line):`
			`content[i] = line.replace(`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00			`match.group(0),`
			`f"![{value}%](https://geps.dev/progress/{value})",`
			`)`

			`with open("README.md", "w", encoding="utf-8") as file:`
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`file.writelines(content)`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00

Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`def compare_files(default_file_path, file_paths, translation_status_file) -> list[tuple[str, int]]:`
			`"""Compares the default properties file with other`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00			`properties files in the directory.`

			`Parameters:`
			`default_file_path (str): The path to the default properties file.`
			`files_directory (str): The directory containing other properties files.`

			`Returns:`
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`list[tuple[str, int]]: A list of tuples containing`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00			`language and progress percentage.`
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`""" # noqa: D205`
			`num_lines = sum(`
			`1 for line in open(default_file_path, encoding="utf-8") if line.strip() and not line.strip().startswith("#")`
			`)`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00
			`result_list = []`
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`sort_translation_status: tomlkit.TOMLDocument`

			`# read toml`
			`with open(translation_status_file, encoding="utf-8") as f:`
			`sort_translation_status = tomlkit.parse(f.read())`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00
			`for file_path in file_paths:`
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`language = os.path.basename(file_path).split("messages_", 1)[1].split(".properties", 1)[0]`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00
			`fails = 0`
			`if "en_GB" in language or "en_US" in language:`
			`result_list.append(("en_GB", 100))`
			`result_list.append(("en_US", 100))`
			`continue`

Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`if language not in sort_translation_status:`
			`sort_translation_status[language] = tomlkit.table()`

			`if (`
			`"ignore" not in sort_translation_status[language]`
			`or len(sort_translation_status[language].get("ignore", [])) < 1`
			`):`
			`sort_translation_status[language]["ignore"] = tomlkit.array(["language.direction"])`

			`# if "missing" not in sort_translation_status[language]:`
			`# sort_translation_status[language]["missing"] = tomlkit.array()`
			`# elif "language.direction" in sort_translation_status[language]["missing"]:`
			`# sort_translation_status[language]["missing"].remove("language.direction")`

			`with open(default_file_path, encoding="utf-8") as default_file, open(file_path, encoding="utf-8") as file:`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00			`for _ in range(5):`
			`next(default_file)`
			`try:`
			`next(file)`
			`except StopIteration:`
			`fails = num_lines`

Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`for line_num, (line_default, line_file) in enumerate(zip(default_file, file), start=6):`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00			`try:`
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`# Ignoring empty lines and lines start with #`
			`if line_default.strip() == "" or line_default.startswith("#"):`
			`continue`

			`default_key, default_value = line_default.split("=", 1)`
			`file_key, file_value = line_file.split("=", 1)`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00			`if (`
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`default_value.strip() == file_value.strip()`
			`and default_key.strip() not in sort_translation_status[language]["ignore"]`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00			`):`
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`print(f"{language}: Line {line_num} is missing the translation.")`
			`# if default_key.strip() not in sort_translation_status[language]["missing"]:`
			`# missing_array = tomlkit.array()`
			`# missing_array.append(default_key.strip())`
			`# missing_array.multiline(True)`
			`# sort_translation_status[language]["missing"].extend(missing_array)`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00			`fails += 1`
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`# elif default_key.strip() in sort_translation_status[language]["ignore"]:`
			`# if default_key.strip() in sort_translation_status[language]["missing"]:`
			`# sort_translation_status[language]["missing"].remove(default_key.strip())`
			`if default_value.strip() != file_value.strip():`
			`# if default_key.strip() in sort_translation_status[language]["missing"]:`
			`# sort_translation_status[language]["missing"].remove(default_key.strip())`
			`if default_key.strip() in sort_translation_status[language]["ignore"]:`
			`sort_translation_status[language]["ignore"].remove(default_key.strip())`

Update progress of language (#1029) 2024-04-04 11:16:10 +02:00			`except IndexError:`
			`pass`

Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`print(f"{language}: {fails} out of {num_lines} lines are not translated.")`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00			`result_list.append(`
			`(`
			`language,`
			`int((num_lines - fails) * 100 / num_lines),`
			`)`
			`)`
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`translation_status = convert_to_multiline(sort_translation_status)`
			`with open(translation_status_file, "w", encoding="utf-8") as file:`
			`file.write(tomlkit.dumps(translation_status))`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00
			`unique_data = list(set(result_list))`
			`unique_data.sort(key=lambda x: x[1], reverse=True)`

			`return unique_data`


			`if __name__ == "__main__":`
			`directory = os.path.join(os.getcwd(), "src", "main", "resources")`
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00			`messages_file_paths = glob.glob(os.path.join(directory, "messages_*.properties"))`
Update progress of language (#1029) 2024-04-04 11:16:10 +02:00			`reference_file = os.path.join(directory, "messages_en_GB.properties")`
Handling Untranslatable Strings (#1133) 2024-04-28 00:26:12 +02:00
			`scripts_directory = os.path.join(os.getcwd(), "scripts")`
			`translation_state_file = os.path.join(scripts_directory, "translation_status.toml")`

			`write_readme(compare_files(reference_file, messages_file_paths, translation_state_file))`