puppala13 commited on
Commit
31c25aa
·
verified ·
1 Parent(s): c846977

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -40
app.py CHANGED
@@ -1,7 +1,4 @@
1
  import streamlit as st
2
- import PyPDF2
3
- import PyPDF2 as PDF
4
- from PyPDF2 import PdfReader
5
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
6
  from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
7
 
@@ -12,54 +9,43 @@ def main():
12
  model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
13
  tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
14
 
15
- # Input option: Text area or file upload
16
- input_option = st.radio("Select Input Option", ("Text", "PDF"))
17
 
18
- if input_option == "Text":
19
- input_text = st.text_area("Enter text to translate", "")
20
- translate_button = st.button("Translate")
21
- if translate_button:
22
- translated_text = translate_text(input_text, model, tokenizer)
23
- st.write("Translated Text:")
24
- st.write(translated_text)
25
- elif input_option == "PDF":
26
- pdf_file = st.file_uploader("Upload PDF file", type=['pdf'])
27
- if pdf_file is not None:
28
- pdf_text = extract_text_from_pdf(pdf_file)
29
- st.write("Extracted Text from PDF:")
30
- st.write(pdf_text)
31
 
32
- translate_button = st.button("Translate")
33
- if translate_button:
34
- translated_text = translate_text(pdf_text, model, tokenizer)
35
- st.write("Translated Text:")
36
- st.write(translated_text)
37
 
38
- def extract_text_from_pdf(pdf_file):
39
- pdf_reader = PdfReader(pdf_file)
40
- text = ""
41
- for page in pdf_reader.pages:
42
- text += page.extract_text()
43
- return text
44
 
45
- def translate_text(input_text, model, tokenizer):
 
 
 
 
 
 
46
  input_ids = tokenizer(input_text, return_tensors="pt").input_ids
47
 
48
- translate_to = st.selectbox("Select language to translate", ("Hindi", "Tamil", "Telugu"))
49
- target_lang = ""
50
- if translate_to == "Hindi":
51
- target_lang = "hi_IN"
52
- elif translate_to == "Tamil":
53
- target_lang = "ta_IN"
54
- elif translate_to == "Telugu":
55
- target_lang = "te_IN"
56
-
57
  generated_tokens = model.generate(
58
  input_ids=input_ids,
59
  forced_bos_token_id=tokenizer.lang_code_to_id[target_lang]
60
  )
 
 
61
  translated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
 
62
  return translated_text
63
 
64
  if __name__ == '__main__':
65
- main()
 
1
  import streamlit as st
 
 
 
2
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
3
  from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
4
 
 
9
  model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
10
  tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
11
 
12
+ # Input text area
13
+ input_text = st.text_area("Enter text to translate", "")
14
 
15
+ # Translation buttons
16
+ translate_hindi = st.button("Hindi")
17
+ translate_tamil = st.button("Tamil")
18
+ translate_telugu = st.button("Telugu")
 
 
 
 
 
 
 
 
 
19
 
20
+ if translate_hindi:
21
+ translated_text = translate_text(input_text, model, tokenizer, target_lang="hi_IN")
22
+ st.write("Translated Text (Hindi):")
23
+ st.write(translated_text)
 
24
 
25
+ if translate_tamil:
26
+ translated_text = translate_text(input_text, model, tokenizer, target_lang="ta_IN")
27
+ st.write("Translated Text (Tamil):")
28
+ st.write(translated_text)
 
 
29
 
30
+ if translate_telugu:
31
+ translated_text = translate_text(input_text, model, tokenizer, target_lang="te_IN")
32
+ st.write("Translated Text (Telugu):")
33
+ st.write(translated_text)
34
+
35
+ def translate_text(input_text, model, tokenizer, target_lang):
36
+ # Tokenize input text
37
  input_ids = tokenizer(input_text, return_tensors="pt").input_ids
38
 
39
+ # Generate translation
 
 
 
 
 
 
 
 
40
  generated_tokens = model.generate(
41
  input_ids=input_ids,
42
  forced_bos_token_id=tokenizer.lang_code_to_id[target_lang]
43
  )
44
+
45
+ # Decode translated text
46
  translated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
47
+
48
  return translated_text
49
 
50
  if __name__ == '__main__':
51
+ main()