puppala13 commited on
Commit
851606d
·
verified ·
1 Parent(s): 9f43ee3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -26
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import streamlit as st
 
2
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
3
  from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
4
 
@@ -9,43 +10,55 @@ def main():
9
  model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
10
  tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
11
 
12
- # Input text area
13
- input_text = st.text_area("Enter text to translate", "")
14
 
15
- # Translation buttons
16
- translate_hindi = st.button("Hindi")
17
- translate_tamil = st.button("Tamil")
18
- translate_telugu = st.button("Telugu")
 
 
 
 
 
 
 
 
 
19
 
20
- if translate_hindi:
21
- translated_text = translate_text(input_text, model, tokenizer, target_lang="hi_IN")
22
- st.write("Translated Text (Hindi):")
23
- st.write(translated_text)
 
24
 
25
- if translate_tamil:
26
- translated_text = translate_text(input_text, model, tokenizer, target_lang="ta_IN")
27
- st.write("Translated Text (Tamil):")
28
- st.write(translated_text)
 
 
 
29
 
30
- if translate_telugu:
31
- translated_text = translate_text(input_text, model, tokenizer, target_lang="te_IN")
32
- st.write("Translated Text (Telugu):")
33
- st.write(translated_text)
34
-
35
- def translate_text(input_text, model, tokenizer, target_lang):
36
- # Tokenize input text
37
  input_ids = tokenizer(input_text, return_tensors="pt").input_ids
38
 
39
- # Generate translation
 
 
 
 
 
 
 
 
40
  generated_tokens = model.generate(
41
  input_ids=input_ids,
42
  forced_bos_token_id=tokenizer.lang_code_to_id[target_lang]
43
  )
44
-
45
- # Decode translated text
46
  translated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
47
-
48
  return translated_text
49
 
50
  if __name__ == '__main__':
51
- main()
 
1
  import streamlit as st
2
+ import PyPDF2
3
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
4
  from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
5
 
 
10
  model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
11
  tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
12
 
13
+ # Input option: Text area or file upload
14
+ input_option = st.radio("Select Input Option", ("Text", "PDF"))
15
 
16
+ if input_option == "Text":
17
+ input_text = st.text_area("Enter text to translate", "")
18
+ translate_button = st.button("Translate")
19
+ if translate_button:
20
+ translated_text = translate_text(input_text, model, tokenizer)
21
+ st.write("Translated Text:")
22
+ st.write(translated_text)
23
+ elif input_option == "PDF":
24
+ pdf_file = st.file_uploader("Upload PDF file", type=['pdf'])
25
+ if pdf_file is not None:
26
+ pdf_text = extract_text_from_pdf(pdf_file)
27
+ st.write("Extracted Text from PDF:")
28
+ st.write(pdf_text)
29
 
30
+ translate_button = st.button("Translate")
31
+ if translate_button:
32
+ translated_text = translate_text(pdf_text, model, tokenizer)
33
+ st.write("Translated Text:")
34
+ st.write(translated_text)
35
 
36
+ def extract_text_from_pdf(pdf_file):
37
+ pdf_reader = PyPDF2.PdfFileReader(pdf_file)
38
+ text = ""
39
+ for page_num in range(pdf_reader.numPages):
40
+ page = pdf_reader.getPage(page_num)
41
+ text += page.extractText()
42
+ return text
43
 
44
+ def translate_text(input_text, model, tokenizer):
 
 
 
 
 
 
45
  input_ids = tokenizer(input_text, return_tensors="pt").input_ids
46
 
47
+ translate_to = st.selectbox("Select language to translate", ("Hindi", "Tamil", "Telugu"))
48
+ target_lang = ""
49
+ if translate_to == "Hindi":
50
+ target_lang = "hi_IN"
51
+ elif translate_to == "Tamil":
52
+ target_lang = "ta_IN"
53
+ elif translate_to == "Telugu":
54
+ target_lang = "te_IN"
55
+
56
  generated_tokens = model.generate(
57
  input_ids=input_ids,
58
  forced_bos_token_id=tokenizer.lang_code_to_id[target_lang]
59
  )
 
 
60
  translated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
 
61
  return translated_text
62
 
63
  if __name__ == '__main__':
64
+ main()