Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

App Files Files Community

OmidSakaki commited on Jul 2

Commit

6ecc4f4

verified ·

1 Parent(s): 19169b4

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -61

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import gradio as gr
 import easyocr
 from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
-from PIL import Image
 import numpy as np
 from typing import Tuple
@@ -14,116 +13,92 @@ class OCRProcessor:
     def extract_text(self, image: np.ndarray) -> str:
         """استخراج متن از تصویر با EasyOCR"""
         try:
-            results = self.reader.readtext(image)
-            return " ".join([result[1] for result in results]) if results else ""
         except Exception as e:
             raise RuntimeError(f"خطا در پردازش OCR: {str(e)}")
 class TextPostProcessor:
     def __init__(self):
-        # تنظیمات پیش‌پردازش متن
         self.replacements = {
-            'ي': 'ی', 'ك': 'ک',
-            '۰':'0', '۱':'1', '۲':'2', '۳':'3', '۴':'4',
-            '۵':'5', '۶':'6', '۷':'7', '۸':'8', '۹':'9'
         }
-        # بارگذاری مدل زبانی فارسی HooshvareLab/gpt2-fa
         try:
             self.llm = pipeline(
-                "text-generation",
-                model="HooshvareLab/gpt2-fa",
                 tokenizer="HooshvareLab/gpt2-fa"
             )
         except Exception as e:
             self.llm = None
-            print(f"خطا در بارگذاری مدل HooshvareLab/gpt2-fa: {str(e)}")
     def preprocess(self, text: str) -> str:
-        """پیش‌پردازش متن استخراج شده"""
         if not text:
             return ""
-        # نرمال‌سازی متن
         for old, new in self.replacements.items():
             text = text.replace(old, new)
         return " ".join(text.split())
     def enhance_with_llm(self, text: str) -> str:
-        """بهبود متن با مدل زبانی"""
         if not text or not self.llm:
             return text
         try:
-            prompt = f"متن زیر را ویرایش و روان‌تر کن:\n{text}\n"
-            enhanced = self.llm(
                 prompt,
-                max_length=len(prompt) + 60,
                 num_return_sequences=1,
                 do_sample=True,
-                temperature=0.8,
-                pad_token_id=0
             )
-            # فقط متن تولیدی پس از پرامپت را برگردان
-            gen_text = enhanced[0]['generated_text']
-            return gen_text[len(prompt):].strip() if gen_text.startswith(prompt) else gen_text.strip()
         except Exception as e:
-            print(f"خطا در بهبود متن با LLM: {str(e)}")
             return text
-## 2. پردازش اصلی
-# ----------------------------------
 def full_processing(image: np.ndarray) -> Tuple[str, str]:
-    """پایپلاین کامل پردازش تصویر"""
     try:
-        # 1. استخراج متن
         ocr_text = OCRProcessor().extract_text(image)
-        # 2. پیش‌پردازش
         post_processor = TextPostProcessor()
         cleaned_text = post_processor.preprocess(ocr_text)
-        # 3. بهبود با مدل زبانی
         enhanced_text = post_processor.enhance_with_llm(cleaned_text)
         return cleaned_text, enhanced_text
     except Exception as e:
         return f"خطا: {str(e)}", ""
 ## 3. رابط کاربری Gradio
-# ----------------------------------
-with gr.Blocks(title="پایپلاین OCR فارسی با LLM") as app:
     gr.Markdown("""
-    # سیستم پیشرفته پردازش متن فارسی
-    استخراج متن از تصاویر + پردازش با مدل زبانی
     """)
     with gr.Row():
         with gr.Column():
             img_input = gr.Image(label="تصویر ورودی", type="numpy")
             process_btn = gr.Button("پردازش تصویر", variant="primary")
         with gr.Column():
-            with gr.Tab("نتایج پردازش"):
                 raw_output = gr.Textbox(label="متن استخراج شده")
-                enhanced_output = gr.Textbox(label="متن بهبود یافته")
-            with gr.Tab("پیش‌نمایش"):
-                gr.Markdown("### تصویر ورودی")
-                img_preview = gr.Image(label="", interactive=False)
-    # پردازش خودکار هنگام آپلود تصویر
-    img_input.change(
-        fn=lambda x: x,
-        inputs=img_input,
-        outputs=img_preview
-    )
-    # پردازش اصلی هنگام کلیک دکمه
-    process_btn.click(
-        fn=full_processing,
-        inputs=img_input,
-        outputs=[raw_output, enhanced_output]
-    )
 if __name__ == "__main__":
     app.launch()

 import gradio as gr
 import easyocr
 from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 import numpy as np
 from typing import Tuple
     def extract_text(self, image: np.ndarray) -> str:
         """استخراج متن از تصویر با EasyOCR"""
         try:
+            results = self.reader.readtext(image, detail=0, paragraph=True)
+            return "\n".join(results) if results else ""
         except Exception as e:
             raise RuntimeError(f"خطا در پردازش OCR: {str(e)}")
 class TextPostProcessor:
     def __init__(self):
         self.replacements = {
+            'ي': 'ی', 'ك': 'ک',
+            '۰': '0', '۱': '1', '۲': '2', '۳': '3', '۴': '4',
+            '۵': '5', '۶': '6', '۷': '7', '۸': '8', '۹': '9'
         }
         try:
             self.llm = pipeline(
+                "text-generation",
+                model="HooshvareLab/gpt2-fa",
                 tokenizer="HooshvareLab/gpt2-fa"
             )
         except Exception as e:
+            print("خطا در بارگذاری مدل زبانی:", e)
             self.llm = None
     def preprocess(self, text: str) -> str:
+        """نرمال‌سازی ساده متن"""
         if not text:
             return ""
         for old, new in self.replacements.items():
             text = text.replace(old, new)
         return " ".join(text.split())
     def enhance_with_llm(self, text: str) -> str:
+        """بازنویسی یا بهبود متن با LLM فارسی"""
         if not text or not self.llm:
             return text
+        prompt = f"متن زیر را بازنویسی کن و به صورت روان و صحیح برگردان:\n{text}\nبازنویسی:"
         try:
+            output = self.llm(
                 prompt,
+                max_length=len(prompt) + len(text) + 60,
                 num_return_sequences=1,
                 do_sample=True,
+                temperature=0.9,
+                pad_token_id=0,
+                eos_token_id=2
             )
+            gen_text = output[0]['generated_text']
+            # فقط بخش بازنویسی شده را جدا کن
+            if "بازنویسی:" in gen_text:
+                gen_text = gen_text.split("بازنویسی:")[-1].strip()
+            # اگر بازنویسی مدل بی‌معنا یا کوتاه بود، همان متن را برگردان
+            if len(gen_text) < 8:
+                return text
+            return gen_text
         except Exception as e:
+            print("خطا در بازنویسی با LLM:", e)
             return text
+## 2. پایپلاین اصلی
 def full_processing(image: np.ndarray) -> Tuple[str, str]:
     try:
         ocr_text = OCRProcessor().extract_text(image)
         post_processor = TextPostProcessor()
         cleaned_text = post_processor.preprocess(ocr_text)
         enhanced_text = post_processor.enhance_with_llm(cleaned_text)
         return cleaned_text, enhanced_text
     except Exception as e:
         return f"خطا: {str(e)}", ""
 ## 3. رابط کاربری Gradio
+with gr.Blocks(title="پایپلاین OCR و بازنویسی متن فارسی") as app:
     gr.Markdown("""
+    # سیستم استخراج و بازنویسی متن فارسی از تصویر
+    تصویر را بارگذاری کنید، متن استخراج و سپس با مدل زبانی بازنویسی می‌شود.
     """)
     with gr.Row():
         with gr.Column():
             img_input = gr.Image(label="تصویر ورودی", type="numpy")
             process_btn = gr.Button("پردازش تصویر", variant="primary")
         with gr.Column():
+            with gr.Tab("متن استخراج شده"):
                 raw_output = gr.Textbox(label="متن استخراج شده")
+            with gr.Tab("متن بازنویسی شده"):
+                enhanced_output = gr.Textbox(label="متن بازنویسی شده")
+    img_input.change(fn=lambda x: x, inputs=img_input, outputs=img_preview)
+    process_btn.click(fn=full_processing, inputs=img_input, outputs=[raw_output, enhanced_output])
 if __name__ == "__main__":
     app.launch()