Spaces:
Runtime error
Runtime error
File size: 1,704 Bytes
08b7f89 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import os
import re
def parse_markdown_files(file_paths):
"""
Parses markdown files to extract content for translation.
:param file_paths: List of markdown file paths
:return: List of dictionaries containing filename and content
"""
parsed_files = []
for path in file_paths:
with open(path, 'r', encoding='utf-8') as f:
content = f.read()
parsed_files.append({'filename': path, 'content': content})
return parsed_files
def extract_translatable_text(content):
"""
Extracts translatable text from markdown content.
:param content: Markdown content
:return: List of translatable text segments
"""
code_block_pattern = re.compile(r'```.*?```', re.DOTALL)
html_block_pattern = re.compile(r'<.*?>', re.DOTALL)
url_pattern = re.compile(r'\[.*?\]\(.*?\)')
# Remove code blocks, HTML blocks, and URLs
content = re.sub(code_block_pattern, '', content)
content = re.sub(html_block_pattern, '', content)
content = re.sub(url_pattern, '', content)
# Extract paragraphs and headers
paragraphs = re.split(r'\n\s*\n', content)
return [para.strip() for para in paragraphs if para.strip()]
def save_translated_files(translated_files):
"""
Saves translated files to the local machine.
:param translated_files: List of translated file data
"""
for file in translated_files:
directory = os.path.dirname(file['filename'])
if not os.path.exists(directory):
os.makedirs(directory)
with open(file['filename'], 'w', encoding='utf-8') as f:
f.write(file['content'])
|