tahirsher commited on
Commit
04f9dd5
·
verified ·
1 Parent(s): d76d653

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -14
app.py CHANGED
@@ -2,27 +2,34 @@ import streamlit as st
2
  import PyPDF2
3
  import docx2txt
4
  from transformers import pipeline
 
5
 
6
- # Initialize Hugging Face Translation Pipelines (Force PyTorch Backend)
7
- try:
8
- translator_en = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en", framework="pt")
9
- translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-ur", framework="pt")
10
- except Exception as e:
11
- st.error(f"Failed to initialize translation models. Error: {e}")
 
 
 
 
 
 
12
 
13
  def extract_text_from_pdf(file):
14
- """Extract text from PDF."""
15
  text = ""
16
  try:
17
  pdf_reader = PyPDF2.PdfReader(file)
18
  for page in pdf_reader.pages:
19
- text += page.extract_text()
20
  except Exception as e:
21
  st.error(f"Error extracting text from PDF: {e}")
22
  return text
23
 
24
  def extract_text_from_word(file):
25
- """Extract text from Word file."""
26
  try:
27
  return docx2txt.process(file)
28
  except Exception as e:
@@ -30,8 +37,8 @@ def extract_text_from_word(file):
30
  return ""
31
 
32
  def translate_text(text, translator):
33
- """Translate text in chunks using the given translator."""
34
- max_chunk_size = 512 # Limit due to token constraints
35
  text_chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)]
36
  translations = []
37
 
@@ -66,11 +73,13 @@ if uploaded_file:
66
  if st.button("Translate"):
67
  if text_content:
68
  st.subheader(f"Translated Text ({target_language})")
69
- if target_language == "English":
70
  translated_text = translate_text(text_content, translator_en)
71
- else:
72
  translated_text = translate_text(text_content, translator_ur)
73
-
 
 
74
  st.text_area("Translation Output", translated_text, height=300)
75
  else:
76
  st.warning("No text found to translate. Please upload a valid document.")
 
2
  import PyPDF2
3
  import docx2txt
4
  from transformers import pipeline
5
+ import sentencepiece
6
 
7
+ # Initialize translation pipelines
8
+ def load_translation_models():
9
+ """Load translation models with error handling."""
10
+ try:
11
+ translator_en = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en", framework="pt")
12
+ translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-ur", framework="pt")
13
+ return translator_en, translator_ur
14
+ except Exception as e:
15
+ st.error(f"Error initializing translation models: {e}")
16
+ return None, None
17
+
18
+ translator_en, translator_ur = load_translation_models()
19
 
20
  def extract_text_from_pdf(file):
21
+ """Extract text from a PDF file."""
22
  text = ""
23
  try:
24
  pdf_reader = PyPDF2.PdfReader(file)
25
  for page in pdf_reader.pages:
26
+ text += page.extract_text() or ""
27
  except Exception as e:
28
  st.error(f"Error extracting text from PDF: {e}")
29
  return text
30
 
31
  def extract_text_from_word(file):
32
+ """Extract text from a Word file."""
33
  try:
34
  return docx2txt.process(file)
35
  except Exception as e:
 
37
  return ""
38
 
39
  def translate_text(text, translator):
40
+ """Translate text in manageable chunks."""
41
+ max_chunk_size = 512
42
  text_chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)]
43
  translations = []
44
 
 
73
  if st.button("Translate"):
74
  if text_content:
75
  st.subheader(f"Translated Text ({target_language})")
76
+ if target_language == "English" and translator_en:
77
  translated_text = translate_text(text_content, translator_en)
78
+ elif target_language == "Urdu" and translator_ur:
79
  translated_text = translate_text(text_content, translator_ur)
80
+ else:
81
+ st.warning("Translation model not loaded successfully.")
82
+
83
  st.text_area("Translation Output", translated_text, height=300)
84
  else:
85
  st.warning("No text found to translate. Please upload a valid document.")