Zeref02210217-cst's picture
Update app.py
25f75fb verified
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Load the model and tokenizer
model_name = "Zeref02210217-cst/en_to_dzo_nllb_mul_mt_nlp_m4"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False) # Disable fast tokenizer to avoid errors
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
# Streamlit app
st.title("English to Dzongkha Translation")
st.write("This app uses the NLLB model for translating English text to Dzongkha.")
# Input text box
input_text = st.text_area("Enter English text here:")
# Translation function
if st.button("Translate"):
if input_text.strip():
with st.spinner("Translating..."):
# Tokenize the input text
inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
# Get the ID for the beginning-of-sequence token for Dzongkha
bos_token_id = tokenizer.convert_tokens_to_ids("dzo_Tibt") # Set this token for forced BOS
# Generate translation using the model
translated_tokens = model.generate(
**inputs,
forced_bos_token_id=bos_token_id, # Apply the forced BOS token for Dzongkha
max_length=30 # Optional: Limit translation length
)
# Decode the output tokens to string
translation = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
# Display the translation
st.success("Translation complete!")
st.text_area("Translated Dzongkha Text:", translation, height=200)
else:
st.warning("Please enter some text to translate!")