|
import requests |
|
import time |
|
import os |
|
from datetime import datetime |
|
import gradio as gr |
|
|
|
from modules.utils.paths import TRANSLATION_OUTPUT_DIR, DEFAULT_PARAMETERS_CONFIG_PATH |
|
from modules.utils.subtitle_manager import * |
|
from modules.utils.files_manager import load_yaml, save_yaml |
|
|
|
""" |
|
This is written with reference to the DeepL API documentation. |
|
If you want to know the information of the DeepL API, see here: https://www.deepl.com/docs-api/documents |
|
""" |
|
|
|
DEEPL_AVAILABLE_TARGET_LANGS = { |
|
'Bulgarian': 'BG', |
|
'Czech': 'CS', |
|
'Danish': 'DA', |
|
'German': 'DE', |
|
'Greek': 'EL', |
|
'English': 'EN', |
|
'English (British)': 'EN-GB', |
|
'English (American)': 'EN-US', |
|
'Spanish': 'ES', |
|
'Estonian': 'ET', |
|
'Finnish': 'FI', |
|
'French': 'FR', |
|
'Hungarian': 'HU', |
|
'Indonesian': 'ID', |
|
'Italian': 'IT', |
|
'Japanese': 'JA', |
|
'Korean': 'KO', |
|
'Lithuanian': 'LT', |
|
'Latvian': 'LV', |
|
'Norwegian (Bokmål)': 'NB', |
|
'Dutch': 'NL', |
|
'Polish': 'PL', |
|
'Portuguese': 'PT', |
|
'Portuguese (Brazilian)': 'PT-BR', |
|
'Portuguese (all Portuguese varieties excluding Brazilian Portuguese)': 'PT-PT', |
|
'Romanian': 'RO', |
|
'Russian': 'RU', |
|
'Slovak': 'SK', |
|
'Slovenian': 'SL', |
|
'Swedish': 'SV', |
|
'Turkish': 'TR', |
|
'Ukrainian': 'UK', |
|
'Chinese (simplified)': 'ZH' |
|
} |
|
|
|
DEEPL_AVAILABLE_SOURCE_LANGS = { |
|
'Automatic Detection': None, |
|
'Bulgarian': 'BG', |
|
'Czech': 'CS', |
|
'Danish': 'DA', |
|
'German': 'DE', |
|
'Greek': 'EL', |
|
'English': 'EN', |
|
'Spanish': 'ES', |
|
'Estonian': 'ET', |
|
'Finnish': 'FI', |
|
'French': 'FR', |
|
'Hungarian': 'HU', |
|
'Indonesian': 'ID', |
|
'Italian': 'IT', |
|
'Japanese': 'JA', |
|
'Korean': 'KO', |
|
'Lithuanian': 'LT', |
|
'Latvian': 'LV', |
|
'Norwegian (Bokmål)': 'NB', |
|
'Dutch': 'NL', |
|
'Polish': 'PL', |
|
'Portuguese (all Portuguese varieties mixed)': 'PT', |
|
'Romanian': 'RO', |
|
'Russian': 'RU', |
|
'Slovak': 'SK', |
|
'Slovenian': 'SL', |
|
'Swedish': 'SV', |
|
'Turkish': 'TR', |
|
'Ukrainian': 'UK', |
|
'Chinese': 'ZH' |
|
} |
|
|
|
|
|
class DeepLAPI: |
|
def __init__(self, |
|
output_dir: str = TRANSLATION_OUTPUT_DIR |
|
): |
|
self.api_interval = 1 |
|
self.max_text_batch_size = 50 |
|
self.available_target_langs = DEEPL_AVAILABLE_TARGET_LANGS |
|
self.available_source_langs = DEEPL_AVAILABLE_SOURCE_LANGS |
|
self.output_dir = output_dir |
|
|
|
def translate_deepl(self, |
|
auth_key: str, |
|
fileobjs: list, |
|
source_lang: str, |
|
target_lang: str, |
|
is_pro: bool = False, |
|
add_timestamp: bool = True, |
|
progress=gr.Progress()) -> list: |
|
""" |
|
Translate subtitle files using DeepL API |
|
Parameters |
|
---------- |
|
auth_key: str |
|
API Key for DeepL from gr.Textbox() |
|
fileobjs: list |
|
List of files to transcribe from gr.Files() |
|
source_lang: str |
|
Source language of the file to transcribe from gr.Dropdown() |
|
target_lang: str |
|
Target language of the file to transcribe from gr.Dropdown() |
|
is_pro: str |
|
Boolean value that is about pro user or not from gr.Checkbox(). |
|
add_timestamp: bool |
|
Boolean value from gr.Checkbox() that determines whether to add a timestamp at the end of the filename. |
|
progress: gr.Progress |
|
Indicator to show progress directly in gradio. |
|
|
|
Returns |
|
---------- |
|
A List of |
|
String to return to gr.Textbox() |
|
Files to return to gr.Files() |
|
""" |
|
if fileobjs and isinstance(fileobjs[0], gr.utils.NamedString): |
|
fileobjs = [fileobj.name for fileobj in fileobjs] |
|
|
|
self.cache_parameters( |
|
api_key=auth_key, |
|
is_pro=is_pro, |
|
source_lang=source_lang, |
|
target_lang=target_lang, |
|
add_timestamp=add_timestamp |
|
) |
|
|
|
files_info = {} |
|
for fileobj in fileobjs: |
|
file_path = fileobj |
|
file_name, file_ext = os.path.splitext(os.path.basename(fileobj)) |
|
|
|
if file_ext == ".srt": |
|
parsed_dicts = parse_srt(file_path=file_path) |
|
|
|
elif file_ext == ".vtt": |
|
parsed_dicts = parse_vtt(file_path=file_path) |
|
|
|
batch_size = self.max_text_batch_size |
|
for batch_start in range(0, len(parsed_dicts), batch_size): |
|
batch_end = min(batch_start + batch_size, len(parsed_dicts)) |
|
sentences_to_translate = [dic["sentence"] for dic in parsed_dicts[batch_start:batch_end]] |
|
translated_texts = self.request_deepl_translate(auth_key, sentences_to_translate, source_lang, |
|
target_lang, is_pro) |
|
for i, translated_text in enumerate(translated_texts): |
|
parsed_dicts[batch_start + i]["sentence"] = translated_text["text"] |
|
progress(batch_end / len(parsed_dicts), desc="Translating...") |
|
|
|
if file_ext == ".srt": |
|
subtitle = get_serialized_srt(parsed_dicts) |
|
elif file_ext == ".vtt": |
|
subtitle = get_serialized_vtt(parsed_dicts) |
|
|
|
if add_timestamp: |
|
timestamp = datetime.now().strftime("%m%d%H%M%S") |
|
file_name += f"-{timestamp}" |
|
|
|
output_path = os.path.join(self.output_dir, f"{file_name}{file_ext}") |
|
write_file(subtitle, output_path) |
|
|
|
files_info[file_name] = {"subtitle": subtitle, "path": output_path} |
|
|
|
total_result = '' |
|
for file_name, info in files_info.items(): |
|
total_result += '------------------------------------\n' |
|
total_result += f'{file_name}\n\n' |
|
total_result += f'{info["subtitle"]}' |
|
gr_str = f"Done! Subtitle is in the outputs/translation folder.\n\n{total_result}" |
|
|
|
output_file_paths = [item["path"] for key, item in files_info.items()] |
|
return [gr_str, output_file_paths] |
|
|
|
def request_deepl_translate(self, |
|
auth_key: str, |
|
text: list, |
|
source_lang: str, |
|
target_lang: str, |
|
is_pro: bool = False): |
|
"""Request API response to DeepL server""" |
|
if source_lang not in list(DEEPL_AVAILABLE_SOURCE_LANGS.keys()): |
|
raise ValueError(f"Source language {source_lang} is not supported." |
|
f"Use one of {list(DEEPL_AVAILABLE_SOURCE_LANGS.keys())}") |
|
if target_lang not in list(DEEPL_AVAILABLE_TARGET_LANGS.keys()): |
|
raise ValueError(f"Target language {target_lang} is not supported." |
|
f"Use one of {list(DEEPL_AVAILABLE_TARGET_LANGS.keys())}") |
|
|
|
url = 'https://api.deepl.com/v2/translate' if is_pro else 'https://api-free.deepl.com/v2/translate' |
|
headers = { |
|
'Authorization': f'DeepL-Auth-Key {auth_key}' |
|
} |
|
data = { |
|
'text': text, |
|
'source_lang': DEEPL_AVAILABLE_SOURCE_LANGS[source_lang], |
|
'target_lang': DEEPL_AVAILABLE_TARGET_LANGS[target_lang] |
|
} |
|
response = requests.post(url, headers=headers, data=data).json() |
|
time.sleep(self.api_interval) |
|
return response["translations"] |
|
|
|
@staticmethod |
|
def cache_parameters(api_key: str, |
|
is_pro: bool, |
|
source_lang: str, |
|
target_lang: str, |
|
add_timestamp: bool): |
|
cached_params = load_yaml(DEFAULT_PARAMETERS_CONFIG_PATH) |
|
cached_params["translation"]["deepl"] = { |
|
"api_key": api_key, |
|
"is_pro": is_pro, |
|
"source_lang": source_lang, |
|
"target_lang": target_lang |
|
} |
|
cached_params["translation"]["add_timestamp"] = add_timestamp |
|
save_yaml(cached_params, DEFAULT_PARAMETERS_CONFIG_PATH) |
|
|