Spaces:

Zeref02210217-cst
/

English-Dzongkha_Translation

Sleeping

Update app.py

25f75fb verified 6 months ago

1.74 kB

	import streamlit as st
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

	# Load the model and tokenizer
	model_name = "Zeref02210217-cst/en_to_dzo_nllb_mul_mt_nlp_m4"
	tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False) # Disable fast tokenizer to avoid errors
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

	# Streamlit app
	st.title("English to Dzongkha Translation")
	st.write("This app uses the NLLB model for translating English text to Dzongkha.")

	# Input text box
	input_text = st.text_area("Enter English text here:")

	# Translation function
	if st.button("Translate"):
	if input_text.strip():
	with st.spinner("Translating..."):
	# Tokenize the input text
	inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512)

	# Get the ID for the beginning-of-sequence token for Dzongkha
	bos_token_id = tokenizer.convert_tokens_to_ids("dzo_Tibt") # Set this token for forced BOS

	# Generate translation using the model
	translated_tokens = model.generate(
	**inputs,
	forced_bos_token_id=bos_token_id, # Apply the forced BOS token for Dzongkha
	max_length=30 # Optional: Limit translation length
	)

	# Decode the output tokens to string
	translation = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)

	# Display the translation
	st.success("Translation complete!")
	st.text_area("Translated Dzongkha Text:", translation, height=200)
	else:
	st.warning("Please enter some text to translate!")