Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

App Files Files Community

OmidSakaki commited on Jul 2

Commit

db9549c

verified ·

1 Parent(s): 0d931e0

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -37

app.py CHANGED Viewed

@@ -2,32 +2,22 @@ import gradio as gr
 import time
 import numpy as np
 from PIL import Image
-from paddleocr import PaddleOCR
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 import easyocr
-import pytesseract
 from doctr.models import ocr_predictor
-# Initialize all models
 models = {
-    "PaddleOCR": PaddleOCR(lang='en'),
     "EasyOCR": easyocr.Reader(['en']),
     "TrOCR": {
         "processor": TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed"),
         "model": VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
     },
-    "Tesseract": None,  # Initialized by pytesseract
     "DocTR": ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
 }
-def run_paddleocr(image):
-    try:
-        result = models["PaddleOCR"].ocr(np.array(image))
-        return ' '.join([line[1][0] for line in result[0]]) if result else ''
-    except Exception as e:
-        return f"Error: {str(e)}"
 def run_easyocr(image):
     try:
         result = models["EasyOCR"].readtext(np.array(image), detail=0)
         return ' '.join(result) if result else ''
@@ -35,6 +25,7 @@ def run_easyocr(image):
         return f"Error: {str(e)}"
 def run_trocr(image):
     try:
         pixel_values = models["TrOCR"]["processor"](image, return_tensors="pt").pixel_values
         generated_ids = models["TrOCR"]["model"].generate(pixel_values)
@@ -42,13 +33,8 @@ def run_trocr(image):
     except Exception as e:
         return f"Error: {str(e)}"
-def run_tesseract(image):
-    try:
-        return pytesseract.image_to_string(image, lang='eng')
-    except Exception as e:
-        return f"Error: {str(e)}"
 def run_doctr(image):
     try:
         if isinstance(image, Image.Image):
             image = np.array(image)
@@ -59,6 +45,7 @@ def run_doctr(image):
         return f"Error: {str(e)}"
 def compare_models(image):
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
     image = image.convert("RGB")
@@ -67,10 +54,8 @@ def compare_models(image):
     times = {}
     # Run all OCR models
-    for name, func in [("PaddleOCR", run_paddleocr),
-                      ("EasyOCR", run_easyocr),
                       ("TrOCR", run_trocr),
-                      ("Tesseract", run_tesseract),
                       ("DocTR", run_doctr)]:
         start = time.time()
         results[name] = func(image)
@@ -81,49 +66,56 @@ def compare_models(image):
     for name in results:
         table_rows.append(f"""
         <tr>
-            <td style="padding: 8px; border: 1px solid #ddd; text-align: center;">{name}</td>
             <td style="padding: 8px; border: 1px solid #ddd;">{results[name]}</td>
-            <td style="padding: 8px; border: 1px solid #ddd; text-align: center;">{times[name]:.3f}</td>
         </tr>
         """)
     comparison = f"""
-    <table style="width:100%; border-collapse: collapse; margin-bottom: 20px;">
-        <tr style="background-color: #f2f2f2;">
-            <th style="padding: 8px; border: 1px solid #ddd; text-align: center;">Model</th>
-            <th style="padding: 8px; border: 1px solid #ddd; text-align: center;">Extracted Text</th>
-            <th style="padding: 8px; border: 1px solid #ddd; text-align: center;">Time (s)</th>
         </tr>
         {''.join(table_rows)}
     </table>
     """
-    return comparison, *results.values()
-# Gradio Interface
-with gr.Blocks(title="Advanced OCR Comparison") as demo:
-    gr.Markdown("## 🚀 Advanced English OCR Comparison (5 Models)")
     with gr.Row():
         with gr.Column():
-            img_input = gr.Image(label="Upload Document", type="pil")
             gr.Examples(
                 examples=["sample1.jpg", "sample2.png"],
                 inputs=img_input,
-                label="Sample Images"
             )
-            submit_btn = gr.Button("Run Comparison", variant="primary")
         with gr.Column():
             comparison = gr.HTML(label="Comparison Results")
             with gr.Accordion("Detailed Results", open=False):
                 gr.Markdown("### Individual Model Outputs")
-                outputs = [gr.Textbox(label=name) for name in models]
     submit_btn.click(
         fn=compare_models,
         inputs=img_input,
-        outputs=[comparison, *outputs]
     )
 if __name__ == "__main__":

 import time
 import numpy as np
 from PIL import Image
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 import easyocr
 from doctr.models import ocr_predictor
+# Initialize models
 models = {
     "EasyOCR": easyocr.Reader(['en']),
     "TrOCR": {
         "processor": TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed"),
         "model": VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
     },
     "DocTR": ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
 }
 def run_easyocr(image):
+    """Run EasyOCR on image"""
     try:
         result = models["EasyOCR"].readtext(np.array(image), detail=0)
         return ' '.join(result) if result else ''
         return f"Error: {str(e)}"
 def run_trocr(image):
+    """Run TrOCR on image"""
     try:
         pixel_values = models["TrOCR"]["processor"](image, return_tensors="pt").pixel_values
         generated_ids = models["TrOCR"]["model"].generate(pixel_values)
     except Exception as e:
         return f"Error: {str(e)}"
 def run_doctr(image):
+    """Run DocTR on image"""
     try:
         if isinstance(image, Image.Image):
             image = np.array(image)
         return f"Error: {str(e)}"
 def compare_models(image):
+    """Compare all OCR models"""
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
     image = image.convert("RGB")
     times = {}
     # Run all OCR models
+    for name, func in [("EasyOCR", run_easyocr),
                       ("TrOCR", run_trocr),
                       ("DocTR", run_doctr)]:
         start = time.time()
         results[name] = func(image)
     for name in results:
         table_rows.append(f"""
         <tr>
+            <td style="padding: 8px; border: 1px solid #ddd; text-align: center; font-weight: bold;">{name}</td>
             <td style="padding: 8px; border: 1px solid #ddd;">{results[name]}</td>
+            <td style="padding: 8px; border: 1px solid #ddd; text-align: center;">{times[name]:.3f}s</td>
         </tr>
         """)
     comparison = f"""
+    <div style="overflow-x: auto;">
+    <table style="width:100%; border-collapse: collapse; margin: 15px 0; font-family: Arial, sans-serif;">
+        <tr style="background-color: #4CAF50; color: white;">
+            <th style="padding: 12px; border: 1px solid #ddd; text-align: center;">Model</th>
+            <th style="padding: 12px; border: 1px solid #ddd; text-align: center;">Extracted Text</th>
+            <th style="padding: 12px; border: 1px solid #ddd; text-align: center;">Processing Time</th>
         </tr>
         {''.join(table_rows)}
     </table>
+    </div>
     """
+    return comparison, results['EasyOCR'], results['TrOCR'], results['DocTR']
+# Create Gradio interface
+with gr.Blocks(title="English OCR Comparison", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🚀 English OCR Model Comparison
+    Compare the performance of top OCR models for English text extraction
+    """)
     with gr.Row():
         with gr.Column():
+            img_input = gr.Image(label="Upload Image", type="pil")
             gr.Examples(
                 examples=["sample1.jpg", "sample2.png"],
                 inputs=img_input,
+                label="Try these sample images"
             )
+            submit_btn = gr.Button("Compare Models", variant="primary")
         with gr.Column():
             comparison = gr.HTML(label="Comparison Results")
             with gr.Accordion("Detailed Results", open=False):
                 gr.Markdown("### Individual Model Outputs")
+                easy_output = gr.Textbox(label="EasyOCR")
+                trocr_output = gr.Textbox(label="TrOCR")
+                doctr_output = gr.Textbox(label="DocTR")
     submit_btn.click(
         fn=compare_models,
         inputs=img_input,
+        outputs=[comparison, easy_output, trocr_output, doctr_output]
     )
 if __name__ == "__main__":