File size: 5,473 Bytes
78163ee 1532744 78163ee 1532744 78163ee 1532744 78163ee 1532744 78163ee 1532744 78163ee 1532744 78163ee 1532744 78163ee 1532744 78163ee 1532744 78163ee 1532744 78163ee 1532744 78163ee 1532744 78163ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import gradio as gr
import re
from difflib import Differ
from src.translation_agent.utils import *
LANGUAGES = {
'English': 'English',
'Español': 'Spanish',
'Français': 'French',
'Deutsch': 'German',
'Italiano': 'Italian',
'Português': 'Portuguese',
'Русский': 'Russian',
'中文': 'Chinese',
'日本語': 'Japanese',
'한국어': 'Korean',
'العربية': 'Arabic',
'हिन्दी': 'Hindi',
}
def diff_texts(text1, text2, lang):
d = Differ()
ic(lang)
if lang == '中文':
return [
(token[2:],
"+" if token[0] == "+" else
"-" if token[0] == "-" else
"~" if token[0] == "?" else
None)
for token in d.compare(text1, text2)
if token[0] in ["+", "-", "?", " "]
]
else:
words1 = re.findall(r'\S+|\s+', text1)
words2 = re.findall(r'\S+|\s+', text2)
return [
(token[2:],
"+" if token[0] == "+" else
"-" if token[0] == "-" else
"~" if token[0] == "?" else
None)
for token in d.compare(words1, words2)
if token[0] in ["+", "-", "?", " "]
]
def translate_text(source_lang, target_lang, source_text, country, max_tokens=MAX_TOKENS_PER_CHUNK):
num_tokens_in_text = num_tokens_in_string(source_text)
ic(num_tokens_in_text)
if num_tokens_in_text < max_tokens:
ic("Translating text as single chunk")
#Note: use yield from B() if put yield in function B()
translation_1 = one_chunk_initial_translation(
source_lang, target_lang, source_text
)
yield translation_1, None, None, None
reflection = one_chunk_reflect_on_translation(
source_lang, target_lang, source_text, translation_1, country
)
yield translation_1, reflection, None, None
translation_2 = one_chunk_improve_translation(
source_lang, target_lang, source_text, translation_1, reflection
)
translation_diff = diff_texts(translation_1, translation_2, target_lang)
yield translation_1, reflection, translation_diff, translation_2
else:
ic("Translating text as multiple chunks")
token_size = calculate_chunk_size(
token_count=num_tokens_in_text, token_limit=max_tokens
)
ic(token_size)
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
model_name = "gpt-4",
chunk_size=token_size,
chunk_overlap=0,
)
source_text_chunks = text_splitter.split_text(source_text)
translation_1_chunks = multichunk_initial_translation(
source_lang, target_lang, source_text_chunks
)
ic(translation_1_chunks)
translation_1 = "".join(translation_1_chunks)
yield translation_1, None, None, None
reflection_chunks = multichunk_reflect_on_translation(
source_lang,
target_lang,
source_text_chunks,
translation_1_chunks,
country,
)
ic(reflection_chunks)
reflection = "".join(reflection_chunks)
yield translation_1, reflection, None, None
translation_2_chunks = multichunk_improve_translation(
source_lang,
target_lang,
source_text_chunks,
translation_1_chunks,
reflection_chunks,
)
ic(translation_2_chunks)
translation_2 = "".join(translation_2_chunks)
translation_diff = diff_texts(translation_1, translation_2, target_lang)
yield translation_1, reflection, translation_diff, translation_2
def update_ui(translation_1, reflection, translation_diff):
return gr.update(value=translation_1), gr.update(value=reflection), gr.update(value=translation_diff), gr.update(value=translation_2)
with gr.Blocks() as demo:
gr.Markdown("# Andrew Ng's Translation Agent ")
with gr.Row():
source_lang = gr.Dropdown(choices=list(LANGUAGES.keys()), value='English', label="Source Language")
target_lang = gr.Dropdown(choices=list(LANGUAGES.keys()), value='中文', label="Target Language")
country = gr.Textbox(label="Country (for target language)")
source_text = gr.Textbox(label="Source Text", lines=5, show_copy_button=True)
btn = gr.Button("Translate")
with gr.Row():
translation_1 = gr.Textbox(label="Initial Translation", lines=3, show_copy_button=True)
reflection = gr.Textbox(label="Reflection", lines=3, show_copy_button=True)
translation_diff = gr.HighlightedText(
label="Comparison",
combine_adjacent=True,
show_legend=True,
color_map={
"+": "green", # 新增的文本显示为绿色
"-": "red", # 删除的文本显示为红色
"~": "yellow" # 修改的文本显示为黄色
}
)
translation_2 = gr.Textbox(label="Final Translation", lines=5, show_copy_button=True)
btn.click(translate_text, inputs=[source_lang, target_lang, source_text, country], outputs=[translation_1, reflection, translation_diff, translation_2], queue=True)
btn.click(update_ui, inputs=[translation_1, reflection, translation_diff], outputs=[translation_1, reflection, translation_diff, translation_2], queue=True)
demo.launch()
|