Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,4 @@
|
|
1 |
import streamlit as st
|
2 |
-
import PyPDF2
|
3 |
-
import PyPDF2 as PDF
|
4 |
-
from PyPDF2 import PdfReader
|
5 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
6 |
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
|
7 |
|
@@ -12,54 +9,43 @@ def main():
|
|
12 |
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
|
13 |
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
|
14 |
|
15 |
-
# Input
|
16 |
-
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
translated_text = translate_text(input_text, model, tokenizer)
|
23 |
-
st.write("Translated Text:")
|
24 |
-
st.write(translated_text)
|
25 |
-
elif input_option == "PDF":
|
26 |
-
pdf_file = st.file_uploader("Upload PDF file", type=['pdf'])
|
27 |
-
if pdf_file is not None:
|
28 |
-
pdf_text = extract_text_from_pdf(pdf_file)
|
29 |
-
st.write("Extracted Text from PDF:")
|
30 |
-
st.write(pdf_text)
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
st.write(translated_text)
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
text += page.extract_text()
|
43 |
-
return text
|
44 |
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
|
47 |
|
48 |
-
|
49 |
-
target_lang = ""
|
50 |
-
if translate_to == "Hindi":
|
51 |
-
target_lang = "hi_IN"
|
52 |
-
elif translate_to == "Tamil":
|
53 |
-
target_lang = "ta_IN"
|
54 |
-
elif translate_to == "Telugu":
|
55 |
-
target_lang = "te_IN"
|
56 |
-
|
57 |
generated_tokens = model.generate(
|
58 |
input_ids=input_ids,
|
59 |
forced_bos_token_id=tokenizer.lang_code_to_id[target_lang]
|
60 |
)
|
|
|
|
|
61 |
translated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
|
|
|
62 |
return translated_text
|
63 |
|
64 |
if __name__ == '__main__':
|
65 |
-
main()
|
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
2 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
3 |
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
|
4 |
|
|
|
9 |
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
|
10 |
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
|
11 |
|
12 |
+
# Input text area
|
13 |
+
input_text = st.text_area("Enter text to translate", "")
|
14 |
|
15 |
+
# Translation buttons
|
16 |
+
translate_hindi = st.button("Hindi")
|
17 |
+
translate_tamil = st.button("Tamil")
|
18 |
+
translate_telugu = st.button("Telugu")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
+
if translate_hindi:
|
21 |
+
translated_text = translate_text(input_text, model, tokenizer, target_lang="hi_IN")
|
22 |
+
st.write("Translated Text (Hindi):")
|
23 |
+
st.write(translated_text)
|
|
|
24 |
|
25 |
+
if translate_tamil:
|
26 |
+
translated_text = translate_text(input_text, model, tokenizer, target_lang="ta_IN")
|
27 |
+
st.write("Translated Text (Tamil):")
|
28 |
+
st.write(translated_text)
|
|
|
|
|
29 |
|
30 |
+
if translate_telugu:
|
31 |
+
translated_text = translate_text(input_text, model, tokenizer, target_lang="te_IN")
|
32 |
+
st.write("Translated Text (Telugu):")
|
33 |
+
st.write(translated_text)
|
34 |
+
|
35 |
+
def translate_text(input_text, model, tokenizer, target_lang):
|
36 |
+
# Tokenize input text
|
37 |
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
|
38 |
|
39 |
+
# Generate translation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
generated_tokens = model.generate(
|
41 |
input_ids=input_ids,
|
42 |
forced_bos_token_id=tokenizer.lang_code_to_id[target_lang]
|
43 |
)
|
44 |
+
|
45 |
+
# Decode translated text
|
46 |
translated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
|
47 |
+
|
48 |
return translated_text
|
49 |
|
50 |
if __name__ == '__main__':
|
51 |
+
main()
|