Spaces:
Sleeping
Sleeping
File size: 2,422 Bytes
83c1b5b 851606d e3f0b46 83c1b5b 3e35f55 83c1b5b 3e35f55 83c1b5b 851606d 83c1b5b 851606d 6608f42 851606d 6608f42 9f43ee3 851606d 9f43ee3 851606d 83c1b5b 851606d 3e35f55 020eac5 851606d dfb8d4c 020eac5 9f43ee3 020eac5 dfb8d4c 83c1b5b 851606d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import streamlit as st
import PyPDF2
import PyPDF2 as PDF
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
def main():
st.title("Translation App")
# Load model and tokenizer
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
# Input option: Text area or file upload
input_option = st.radio("Select Input Option", ("Text", "PDF"))
if input_option == "Text":
input_text = st.text_area("Enter text to translate", "")
translate_button = st.button("Translate")
if translate_button:
translated_text = translate_text(input_text, model, tokenizer)
st.write("Translated Text:")
st.write(translated_text)
elif input_option == "PDF":
pdf_file = st.file_uploader("Upload PDF file", type=['pdf'])
if pdf_file is not None:
pdf_text = extract_text_from_pdf(pdf_file)
st.write("Extracted Text from PDF:")
st.write(pdf_text)
translate_button = st.button("Translate")
if translate_button:
translated_text = translate_text(pdf_text, model, tokenizer)
st.write("Translated Text:")
st.write(translated_text)
def extract_text_from_pdf(pdf_file):
pdf_reader = PyPDF2.PdfFileReader(pdf_file)
text = ""
for page_num in range(pdf_reader.numPages):
page = pdf_reader.getPage(page_num)
text += page.extractText()
return text
def translate_text(input_text, model, tokenizer):
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
translate_to = st.selectbox("Select language to translate", ("Hindi", "Tamil", "Telugu"))
target_lang = ""
if translate_to == "Hindi":
target_lang = "hi_IN"
elif translate_to == "Tamil":
target_lang = "ta_IN"
elif translate_to == "Telugu":
target_lang = "te_IN"
generated_tokens = model.generate(
input_ids=input_ids,
forced_bos_token_id=tokenizer.lang_code_to_id[target_lang]
)
translated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
return translated_text
if __name__ == '__main__':
main()
|