Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

App Files Files Community

OmidSakaki commited on Jul 2

Commit

57fa964

verified ·

1 Parent(s): 28a9f71

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -22

app.py CHANGED Viewed

@@ -2,7 +2,9 @@ import gradio as gr
 import easyocr
 import numpy as np
 from typing import Tuple
 class OCRProcessor:
     def __init__(self):
         self.reader = easyocr.Reader(['fa'])
@@ -14,33 +16,49 @@ class OCRProcessor:
         except Exception as e:
             raise RuntimeError(f"خطا در پردازش OCR: {str(e)}")
-class TextPostProcessor:
     def __init__(self):
-        self.replacements = {
-            'ي': 'ی', 'ك': 'ک',
-            '۰': '0', '۱': '1', '۲': '2', '۳': '3', '۴': '4',
-            '۵': '5', '۶': '6', '۷': '7', '۸': '8', '۹': '9'
-        }
-    def preprocess(self, text: str) -> str:
-        if not text:
-            return ""
-        for old, new in self.replacements.items():
-            text = text.replace(old, new)
-        return " ".join(text.split())
 def full_processing(image: np.ndarray) -> Tuple[str, str]:
     try:
         ocr_text = OCRProcessor().extract_text(image)
-        post_processor = TextPostProcessor()
-        cleaned_text = post_processor.preprocess(ocr_text)
-        return cleaned_text, cleaned_text
     except Exception as e:
-        return f"خطا: {str(e)}", ""
-with gr.Blocks(title="پایپلاین OCR و تصحیح متن فارسی") as app:
     gr.Markdown("""
-    # استخراج و تصحیح متن فارسی از تصویر
     """)
     with gr.Row():
@@ -48,10 +66,14 @@ with gr.Blocks(title="پایپلاین OCR و تصحیح متن فارسی") as
             img_input = gr.Image(label="تصویر ورودی", type="numpy")
             process_btn = gr.Button("پردازش تصویر", variant="primary")
         with gr.Column():
-            raw_output = gr.Textbox(label="متن استخراج شده", lines=8, max_lines=None)
-            enhanced_output = gr.Textbox(label="متن نهایی", lines=10, max_lines=None)
-    process_btn.click(fn=full_processing, inputs=img_input, outputs=[raw_output, enhanced_output])
 if __name__ == "__main__":
     app.launch()

 import easyocr
 import numpy as np
 from typing import Tuple
+from transformers import pipeline
+# --- 1. کلاس OCR برای استخراج متن از تصویر ---
 class OCRProcessor:
     def __init__(self):
         self.reader = easyocr.Reader(['fa'])
         except Exception as e:
             raise RuntimeError(f"خطا در پردازش OCR: {str(e)}")
+# --- 2. کلاس تصحیح متن با مدل زبانی ---
+class TextCorrector:
     def __init__(self):
+        # استفاده از مدل ParsBERT برای تصحیح متن فارسی
+        self.corrector = pipeline(
+            "text2text-generation",
+            model="persiannlp/parsbert-uncased",  # مدل زبانی فارسی
+            tokenizer="persiannlp/parsbert-uncased"
+        )
+    def correct(self, text: str) -> str:
+        if not text.strip():
+            return text
+        try:
+            corrected = self.corrector(
+                text,
+                max_length=512,
+                num_beams=5,
+                early_stopping=True
+            )
+            return corrected[0]['generated_text']
+        except Exception as e:
+            print(f"خطا در تصحیح متن: {e}")
+            return text
+# --- 3. پردازش کامل (OCR + تصحیح خودکار) ---
 def full_processing(image: np.ndarray) -> Tuple[str, str]:
     try:
+        # استخراج متن از تصویر
         ocr_text = OCRProcessor().extract_text(image)
+        # تصحیح متن با مدل زبانی
+        corrected_text = TextCorrector().correct(ocr_text)
+        return ocr_text, corrected_text
     except Exception as e:
+        error_msg = f"خطا: {str(e)}"
+        return error_msg, error_msg
+# --- 4. رابط کاربری Gradio ---
+with gr.Blocks(title="پایپلاین OCR + تصحیح خودکار متن فارسی") as app:
     gr.Markdown("""
+    # استخراج و تصحیح هوشمند متن فارسی از تصویر
     """)
     with gr.Row():
             img_input = gr.Image(label="تصویر ورودی", type="numpy")
             process_btn = gr.Button("پردازش تصویر", variant="primary")
         with gr.Column():
+            raw_output = gr.Textbox(label="متن استخراج شده (خام)", lines=8, interactive=True)
+            corrected_output = gr.Textbox(label="متن تصحیح شده (هوشمند)", lines=10, interactive=True)
+    process_btn.click(
+        fn=full_processing,
+        inputs=img_input,
+        outputs=[raw_output, corrected_output]
+    )
 if __name__ == "__main__":
     app.launch()