Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

App Files Files Community

OmidSakaki commited on Jul 2

Commit

c5a772e

verified ·

1 Parent(s): 8f46e75

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -38

app.py CHANGED Viewed

@@ -3,56 +3,117 @@ import easyocr
 from transformers import pipeline
 from PIL import Image
 import numpy as np
-# Initialize EasyOCR for Persian
-reader = easyocr.Reader(['fa'])
-# Initialize text processing pipeline
-try:
-    # استفاده از مدل محلی یا مدل‌های عمومی
-    text_processor = pipeline("text-generation", model="gpt2")  # مدل جایگزین
-except Exception as e:
-    text_processor = None
-def run_ocr(image):
-    """استخراج متن از تصویر با EasyOCR"""
-    try:
-        if isinstance(image, Image.Image):
-            image = np.array(image)
-        results = reader.readtext(image)
-        return " ".join([result[1] for result in results]) if results else "متنی یافت نشد!"
-    except Exception as e:
-        return f"خطا در OCR: {str(e)}"
-def process_text(text):
-    """پردازش ساده متن"""
-    if text == "متنی یافت نشد!":
-        return text
-    # اگر پردازشگر متن وجود داشت از آن استفاده کن
-    if text_processor:
         try:
-            return text_processor(text, max_length=50)[0]['generated_text']
         except:
-            return text  # اگر خطا رخ داد متن اصلی را برگردان
-    return text  # اگر پردازشگر متن وجود نداشت
-with gr.Blocks(title="سیستم OCR فارسی") as app:
-    gr.Markdown("## استخراج متن فارسی از تصاویر")
     with gr.Row():
         with gr.Column():
-            img_input = gr.Image(label="تصویر ورودی", type="pil")
-            btn = gr.Button("پردازش تصویر", variant="primary")
         with gr.Column():
-            ocr_output = gr.Textbox(label="متن استخراج شده")
-            processed_output = gr.Textbox(label="متن پردازش شده", visible=False)  # غیرفعال شده
-    btn.click(
-        fn=lambda x: (run_ocr(x), process_text(run_ocr(x))),
         inputs=img_input,
-        outputs=[ocr_output, processed_output]
     )
 if __name__ == "__main__":

 from transformers import pipeline
 from PIL import Image
 import numpy as np
+import os
+from typing import Tuple
+## 1. تنظیمات اولیه و مدل‌ها
+# ----------------------------------
+class OCRProcessor:
+    def __init__(self):
+        self.reader = easyocr.Reader(['fa'])
+    def extract_text(self, image: np.ndarray) -> str:
+        """استخراج متن از تصویر با EasyOCR"""
+        try:
+            results = self.reader.readtext(image)
+            return " ".join([result[1] for result in results]) if results else ""
+        except Exception as e:
+            raise RuntimeError(f"خطا در پردازش OCR: {str(e)}")
+class TextPostProcessor:
+    def __init__(self):
+        # تنظیمات پیش‌پردازش متن
+        self.replacements = {
+            'ي': 'ی', 'ك': 'ک',
+            '۰':'0', '۱':'1', '۲':'2', '۳':'3', '۴':'4',
+            '۵':'5', '۶':'6', '۷':'7', '۸':'8', '۹':'9'
+        }
+        # بارگذاری مدل زبانی
+        try:
+            self.llm = pipeline("text-generation", model="gpt2")
+        except:
+            self.llm = None
+    def preprocess(self, text: str) -> str:
+        """پیش‌پردازش متن استخراج شده"""
+        if not text:
+            return ""
+        # نرمال‌سازی متن
+        for old, new in self.replacements.items():
+            text = text.replace(old, new)
+        return " ".join(text.split())
+    def enhance_with_llm(self, text: str) -> str:
+        """بهبود متن با مدل زبانی"""
+        if not text or not self.llm:
+            return text
         try:
+            enhanced = self.llm(
+                f"اصلاح و بازنویسی متن فارسی زیر:\n{text}\n\nمتن بهبود یافته:",
+                max_length=200,
+                num_return_sequences=1
+            )
+            return enhanced[0]['generated_text']
         except:
+            return text
+## 2. پردازش اصلی
+# ----------------------------------
+def full_processing(image: np.ndarray) -> Tuple[str, str]:
+    """پایپلاین کامل پردازش تصویر"""
+    try:
+        # 1. استخراج متن
+        ocr_text = OCRProcessor().extract_text(image)
+        # 2. پیش‌پردازش
+        post_processor = TextPostProcessor()
+        cleaned_text = post_processor.preprocess(ocr_text)
+        # 3. بهبود با مدل زبانی
+        enhanced_text = post_processor.enhance_with_llm(cleaned_text)
+        return cleaned_text, enhanced_text
+    except Exception as e:
+        return f"خطا: {str(e)}", ""
+## 3. رابط کاربری Gradio
+# ----------------------------------
+with gr.Blocks(title="پایپلاین OCR فارسی با LLM") as app:
+    gr.Markdown("""
+    # سیستم پیشرفته پردازش متن فارسی
+    استخراج متن از تصاویر + پردازش با مدل زبانی
+    """)
     with gr.Row():
         with gr.Column():
+            img_input = gr.Image(label="تصویر ورودی", type="numpy")
+            process_btn = gr.Button("پردازش تصویر", variant="primary")
         with gr.Column():
+            with gr.Tab("نتایج پردازش"):
+                raw_output = gr.Textbox(label="متن استخراج شده")
+                enhanced_output = gr.Textbox(label="متن بهبود یافته")
+            with gr.Tab("پیش‌نمایش"):
+                gr.Markdown("### تصویر ورودی")
+                img_preview = gr.Image(label="", interactive=False)
+    # پردازش خودکار هنگام آپلود تصویر
+    img_input.change(
+        fn=lambda x: x,
+        inputs=img_input,
+        outputs=img_preview
+    )
+    # پردازش اصلی هنگام کلیک دکمه
+    process_btn.click(
+        fn=full_processing,
         inputs=img_input,
+        outputs=[raw_output, enhanced_output]
     )
 if __name__ == "__main__":