Spaces:

tahirsher
/

Multilingual_Translator-English-Urdu

Sleeping

App Files Files Community

tahirsher commited on Jan 31

Commit

add3a0f

verified ·

1 Parent(s): ab2f33a

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -15

app.py CHANGED Viewed

@@ -1,28 +1,21 @@
 import fitz  # PyMuPDF for PDF processing
 from PIL import Image  # For image processing
-from transformers import AutoTokenizer, AutoModelForImageTextToText, AutoModelForCausalLM, pipeline
 import streamlit as st
 import os
 import io
 from docx import Document  # For Word document processing
-# Load the TrOCR model for image-to-text
-trocr_tokenizer = AutoTokenizer.from_pretrained("microsoft/trocr-large-printed")
-trocr_model = AutoModelForImageTextToText.from_pretrained("microsoft/trocr-large-printed")
-# Load the DeepSeek model for text-to-text translation
-translation_tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Llama-8B")
-translation_model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Llama-8B")
-# Set up the translation pipeline
-translator = pipeline("text-generation", model=translation_model, tokenizer=translation_tokenizer)
 # Function to extract text from an image using TrOCR
 def extract_text_from_image(image):
-    inputs = trocr_tokenizer(image, return_tensors="pt").input_ids
-    outputs = trocr_model.generate(inputs)
-    text = trocr_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return text
 # Function to extract text from a PDF
 def extract_from_pdf(pdf_path):
@@ -51,7 +44,7 @@ def extract_from_word(docx_path):
 # Function to translate text to English
 def translate_text(text):
-    translated_text = translator(text, max_length=400)[0]['generated_text']
     return translated_text
 # Function to create a PDF from translated text

 import fitz  # PyMuPDF for PDF processing
 from PIL import Image  # For image processing
+from transformers import pipeline
 import streamlit as st
 import os
 import io
 from docx import Document  # For Word document processing
+# Load the TrOCR model for image-to-text (smaller model)
+trocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-base-printed")
+# Load the translation model (smaller model)
+translator = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
 # Function to extract text from an image using TrOCR
 def extract_text_from_image(image):
+    result = trocr_pipeline(image)
+    return result[0]['generated_text']
 # Function to extract text from a PDF
 def extract_from_pdf(pdf_path):
 # Function to translate text to English
 def translate_text(text):
+    translated_text = translator(text, max_length=400)[0]['translation_text']
     return translated_text
 # Function to create a PDF from translated text