Spaces:

chi-vi
/

hirashiba-mt-jp-names

Sleeping

App Files Files Community

hirashiba-mt-jp-names / app.py

Moleys

Update app.py

3b1df6c verified 4 months ago

raw

history blame contribute delete

2.06 kB

	import torch
	from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
	import gradio as gr
	import opencc

	# Load model và tokenizer
	model_name = "chi-vi/hirashiba-mt-jp-names"
	device = "cuda" if torch.cuda.is_available() else "cpu"

	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

	# Khởi tạo OpenCC converter
	converter = opencc.OpenCC('t2s') # Chuyển đổi từ Phồn thể sang Giản thể

	def translate_text(input_text):
	lines = input_text.split('\n') # Tách từng dòng
	translated_lines = []

	for line in lines:
	raw_text = line.strip()
	if not raw_text:
	translated_lines.append('') # Giữ dòng trống
	continue

	# Chuyển đổi sang giản thể
	simplified_text = converter.convert(raw_text)

	# Tokenize input
	inputs = tokenizer(simplified_text, return_tensors="pt", padding=True, truncation=True).to(device)

	# Dịch với mô hình (không cần tính gradient)
	with torch.no_grad():
	output_tokens = model.generate(**inputs, max_length=512)

	# Giải mã kết quả và viết hoa chữ đầu
	translated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True).capitalize()
	translated_lines.append(translated_text)

	return '\n'.join(translated_lines)

	if __name__ == '__main__':
	with gr.Blocks() as app:
	gr.Markdown('## Japanese names (in Simplified Chinese) to Romaji')

	with gr.Row():
	with gr.Column(scale=1):
	input_text = gr.Textbox(label='Input Text', lines=5, placeholder='Enter text here...')
	translate_button = gr.Button('Translate')
	output_text = gr.Textbox(label='Output Text', lines=5, interactive=False)

	translate_button.click(
	fn=translate_text,
	inputs=input_text,
	outputs=output_text
	)

	app.launch()