|
import gradio as gr |
|
import re |
|
from difflib import Differ |
|
from src.translation_agent.utils import * |
|
|
|
LANGUAGES = { |
|
'English': 'English', |
|
'Español': 'Spanish', |
|
'Français': 'French', |
|
'Deutsch': 'German', |
|
'Italiano': 'Italian', |
|
'Português': 'Portuguese', |
|
'Русский': 'Russian', |
|
'中文': 'Chinese', |
|
'日本語': 'Japanese', |
|
'한국어': 'Korean', |
|
'العربية': 'Arabic', |
|
'हिन्दी': 'Hindi', |
|
} |
|
|
|
|
|
def diff_texts(text1, text2, lang): |
|
d = Differ() |
|
ic(lang) |
|
if lang == '中文': |
|
return [ |
|
(token[2:], |
|
"+" if token[0] == "+" else |
|
"-" if token[0] == "-" else |
|
"~" if token[0] == "?" else |
|
None) |
|
for token in d.compare(text1, text2) |
|
if token[0] in ["+", "-", "?", " "] |
|
] |
|
else: |
|
words1 = re.findall(r'\S+|\s+', text1) |
|
words2 = re.findall(r'\S+|\s+', text2) |
|
|
|
return [ |
|
(token[2:], |
|
"+" if token[0] == "+" else |
|
"-" if token[0] == "-" else |
|
"~" if token[0] == "?" else |
|
None) |
|
for token in d.compare(words1, words2) |
|
if token[0] in ["+", "-", "?", " "] |
|
] |
|
|
|
|
|
def translate_text(source_lang, target_lang, source_text, country, max_tokens=MAX_TOKENS_PER_CHUNK): |
|
num_tokens_in_text = num_tokens_in_string(source_text) |
|
|
|
ic(num_tokens_in_text) |
|
|
|
if num_tokens_in_text < max_tokens: |
|
ic("Translating text as single chunk") |
|
|
|
|
|
translation_1 = one_chunk_initial_translation( |
|
source_lang, target_lang, source_text |
|
) |
|
yield translation_1, None, None, None |
|
|
|
reflection = one_chunk_reflect_on_translation( |
|
source_lang, target_lang, source_text, translation_1, country |
|
) |
|
yield translation_1, reflection, None, None |
|
|
|
translation_2 = one_chunk_improve_translation( |
|
source_lang, target_lang, source_text, translation_1, reflection |
|
) |
|
translation_diff = diff_texts(translation_1, translation_2, target_lang) |
|
yield translation_1, reflection, translation_diff, translation_2 |
|
|
|
else: |
|
ic("Translating text as multiple chunks") |
|
|
|
token_size = calculate_chunk_size( |
|
token_count=num_tokens_in_text, token_limit=max_tokens |
|
) |
|
|
|
ic(token_size) |
|
|
|
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( |
|
model_name = "gpt-4", |
|
chunk_size=token_size, |
|
chunk_overlap=0, |
|
) |
|
|
|
source_text_chunks = text_splitter.split_text(source_text) |
|
|
|
translation_1_chunks = multichunk_initial_translation( |
|
source_lang, target_lang, source_text_chunks |
|
) |
|
ic(translation_1_chunks) |
|
translation_1 = "".join(translation_1_chunks) |
|
yield translation_1, None, None, None |
|
|
|
reflection_chunks = multichunk_reflect_on_translation( |
|
source_lang, |
|
target_lang, |
|
source_text_chunks, |
|
translation_1_chunks, |
|
country, |
|
) |
|
ic(reflection_chunks) |
|
reflection = "".join(reflection_chunks) |
|
yield translation_1, reflection, None, None |
|
|
|
translation_2_chunks = multichunk_improve_translation( |
|
source_lang, |
|
target_lang, |
|
source_text_chunks, |
|
translation_1_chunks, |
|
reflection_chunks, |
|
) |
|
ic(translation_2_chunks) |
|
translation_2 = "".join(translation_2_chunks) |
|
translation_diff = diff_texts(translation_1, translation_2, target_lang) |
|
|
|
yield translation_1, reflection, translation_diff, translation_2 |
|
|
|
|
|
def update_ui(translation_1, reflection, translation_diff): |
|
return gr.update(value=translation_1), gr.update(value=reflection), gr.update(value=translation_diff), gr.update(value=translation_2) |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Andrew Ng's Translation Agent ") |
|
with gr.Row(): |
|
source_lang = gr.Dropdown(choices=list(LANGUAGES.keys()), value='English', label="Source Language") |
|
target_lang = gr.Dropdown(choices=list(LANGUAGES.keys()), value='中文', label="Target Language") |
|
country = gr.Textbox(label="Country (for target language)") |
|
source_text = gr.Textbox(label="Source Text", lines=5, show_copy_button=True) |
|
|
|
btn = gr.Button("Translate") |
|
|
|
with gr.Row(): |
|
translation_1 = gr.Textbox(label="Initial Translation", lines=3, show_copy_button=True) |
|
reflection = gr.Textbox(label="Reflection", lines=3, show_copy_button=True) |
|
|
|
translation_diff = gr.HighlightedText( |
|
label="Comparison", |
|
combine_adjacent=True, |
|
show_legend=True, |
|
color_map={ |
|
"+": "green", |
|
"-": "red", |
|
"~": "yellow" |
|
} |
|
) |
|
translation_2 = gr.Textbox(label="Final Translation", lines=5, show_copy_button=True) |
|
|
|
btn.click(translate_text, inputs=[source_lang, target_lang, source_text, country], outputs=[translation_1, reflection, translation_diff, translation_2], queue=True) |
|
btn.click(update_ui, inputs=[translation_1, reflection, translation_diff], outputs=[translation_1, reflection, translation_diff, translation_2], queue=True) |
|
|
|
demo.launch() |
|
|