import os from pathlib import Path import requests from .model import Languages, Summary, TranslationDoc URL = "https://api.github.com/repos/huggingface/transformers/git/trees/main?recursive=1" def get_github_repo_files(): """ Get github repo files """ response = requests.get(URL) data = response.json() all_items = data.get("tree", []) file_paths = [ item["path"] for item in all_items if item["type"] == "blob" and (item["path"].startswith("docs")) ] return file_paths def retrieve(summary: Summary, table_size: int = 10) -> tuple[str, list[str]]: """ Retrieve missing docs """ report = f""" | Item | Count | Percentage | |------|-------|------------| | ๐Ÿ“‚ HuggingFaces docs | {summary.files_analyzed} | - | | ๐Ÿชน Missing translations | {summary.files_missing_translation} | {summary.percentage_missing_translation:.2f}% | """ print(report) first_missing_docs = list() for file in summary.first_missing_translation_files(table_size): first_missing_docs.append(file.original_file) print(first_missing_docs) return report, first_missing_docs def report(target_lang: str, top_k: int = 1) -> tuple[str, list[str]]: """ Generate a report for the translated docs """ docs_file = get_github_repo_files() base_docs_path = Path("docs/source") en_docs_path = Path("docs/source/en") lang = Languages[target_lang] summary = Summary(lang=lang.value) for file in docs_file: if file.endswith(".md"): try: file_relative_path = Path(file).relative_to(en_docs_path) except ValueError: continue translated_path = os.path.join( base_docs_path, lang.value, file_relative_path ) translation_exists = translated_path in docs_file doc = TranslationDoc( translation_lang=lang.value, original_file=file, translation_file=translated_path, translation_exists=translation_exists, ) summary.append_file(doc) return retrieve(summary, top_k)