tahirsher commited on
Commit
ade99c4
·
verified ·
1 Parent(s): b7b5b8a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import PyPDF2
3
+ import docx2txt
4
+ from transformers import pipeline
5
+
6
+ # Hugging Face translation pipeline
7
+ translator_en = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en") # Multilingual to English
8
+ translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-ur") # Multilingual to Urdu
9
+
10
+ def extract_text_from_pdf(file):
11
+ """Extract text from a PDF file."""
12
+ pdf_reader = PyPDF2.PdfReader(file)
13
+ text = ""
14
+ for page in pdf_reader.pages:
15
+ text += page.extract_text()
16
+ return text
17
+
18
+ def extract_text_from_word(file):
19
+ """Extract text from a Word file."""
20
+ return docx2txt.process(file)
21
+
22
+ def translate_text(text, target_language):
23
+ """Translate text to the selected language."""
24
+ if target_language == "English":
25
+ return translator_en(text[:500]) if text else "No text found"
26
+ elif target_language == "Urdu":
27
+ return translator_ur(text[:500]) if text else "No text found"
28
+ return "Invalid translation choice."
29
+
30
+ # Streamlit UI
31
+ st.title("Multilingual Document Translator")
32
+ st.write("Translate PDF or Word documents to English and Urdu quickly.")
33
+
34
+ # File uploader
35
+ uploaded_file = st.file_uploader("Upload a PDF or Word file", type=["pdf", "docx"])
36
+ target_language = st.radio("Select the target language for translation", ["English", "Urdu"])
37
+
38
+ if uploaded_file:
39
+ # Extract text
40
+ if uploaded_file.name.endswith(".pdf"):
41
+ text_content = extract_text_from_pdf(uploaded_file)
42
+ else:
43
+ text_content = extract_text_from_word(uploaded_file)
44
+
45
+ # Show extracted text preview
46
+ st.subheader("Extracted Text (Preview)")
47
+ st.write(text_content[:500] if text_content else "No content found in the file.")
48
+
49
+ # Perform translation
50
+ if st.button("Translate"):
51
+ if text_content:
52
+ st.subheader(f"Translated Text ({target_language})")
53
+ translation_results = translate_text(text_content, target_language)
54
+ translations = "\n".join([result['translation_text'] for result in translation_results])
55
+ st.text_area("Translation Output", translations, height=300)
56
+ else:
57
+ st.warning("No text found to translate. Please upload a valid document.")