Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

App Files Files Community

OmidSakaki commited on Jul 2

Commit

12a2f23

verified ·

1 Parent(s): 4723eed

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -42

app.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import gradio as gr
 import easyocr
 import numpy as np
-from typing import Tuple
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 class OCRProcessor:
     def __init__(self):
@@ -13,66 +13,69 @@ class OCRProcessor:
             results = self.reader.readtext(image, detail=0, paragraph=True)
             return "\n".join(results) if results else ""
         except Exception as e:
-            raise RuntimeError(f"خطا در پردازش OCR: {str(e)}")
-class TextCorrector:
     def __init__(self):
-        model_name = "HooshvareLab/mt5-small-parsbert-uncased"
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-            self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
         except Exception as e:
-            raise RuntimeError(f"خطا در بارگذاری مدل زبانی: {str(e)}")
-    def correct(self, text: str) -> str:
-        if not text.strip():
-            return text
         try:
-            prompt = "بازنویسی و تصحیح: " + text
-            inputs = self.tokenizer(
-                prompt,
-                return_tensors="pt",
-                max_length=512,
-                truncation=True
             )
-            outputs = self.model.generate(
-                **inputs,
-                max_length=512,
-                num_beams=5,
-                early_stopping=True
             )
-            return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
         except Exception as e:
-            print(f"خطا در تصحیح متن: {e}")
-            return text
-def full_processing(image: np.ndarray) -> Tuple[str, str]:
-    try:
-        ocr_text = OCRProcessor().extract_text(image)
-        corrected_text = TextCorrector().correct(ocr_text)
-        return ocr_text, corrected_text
-    except Exception as e:
-        error_msg = f"خطا: {str(e)}"
-        return error_msg, error_msg
-with gr.Blocks(title="پایپلاین OCR + تصحیح خودکار متن فارسی") as app:
     gr.Markdown("""
-    # سیستم استخراج و تصحیح هوشمند متن فارسی
     """)
     with gr.Row():
         with gr.Column():
             img_input = gr.Image(label="تصویر ورودی", type="numpy")
-            process_btn = gr.Button("پردازش تصویر", variant="primary")
         with gr.Column():
-            raw_output = gr.Textbox(label="متن استخراج شده (خام)", lines=8, max_lines=None)
-            corrected_output = gr.Textbox(label="متن تصحیح شده (هوشمند)", lines=15, max_lines=None)
     process_btn.click(
-        fn=full_processing,
-        inputs=img_input,
-        outputs=[raw_output, corrected_output]
     )
 if __name__ == "__main__":

 import gradio as gr
 import easyocr
 import numpy as np
+from transformers import AutoTokenizer, AutoModelForQuestionAnswering
+import torch
 class OCRProcessor:
     def __init__(self):
             results = self.reader.readtext(image, detail=0, paragraph=True)
             return "\n".join(results) if results else ""
         except Exception as e:
+            return f"خطا در پردازش OCR: {str(e)}"
+class PersianQAModel:
     def __init__(self):
+        model_name = "OmidSakaki/roberta_Persian_QA"
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            self.model = AutoModelForQuestionAnswering.from_pretrained(model_name)
         except Exception as e:
+            raise RuntimeError(f"خطا در بارگذاری مدل پرسش و پاسخ: {str(e)}")
+    def answer_question(self, context: str, question: str) -> str:
+        if not context.strip() or not question.strip():
+            return "متن یا سوال وارد نشده است."
         try:
+            inputs = self.tokenizer.encode_plus(
+                question, context, return_tensors='pt', truncation=True, max_length=512
             )
+            input_ids = inputs["input_ids"].tolist()[0]
+            outputs = self.model(**inputs)
+            answer_start = torch.argmax(outputs.start_logits)
+            answer_end = torch.argmax(outputs.end_logits) + 1
+            answer = self.tokenizer.convert_tokens_to_string(
+                self.tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])
             )
+            # حذف توکن‌های اضافی یا فاصله
+            answer = answer.strip()
+            if not answer or answer in ['[CLS]', '[SEP]', '[PAD]']:
+                return "جوابی یافت نشد."
+            return answer
         except Exception as e:
+            return f"خطا در مدل پرسش و پاسخ: {str(e)}"
+ocr_processor = OCRProcessor()
+qa_model = PersianQAModel()
+def pipeline(image, question):
+    # استخراج متن از تصویر
+    context = ocr_processor.extract_text(image)
+    # پاسخ به سوال بر اساس متن
+    answer = qa_model.answer_question(context, question)
+    return context, answer
+with gr.Blocks(title="استخراج متن و پاسخ به سوال از تصویر فارسی") as app:
     gr.Markdown("""
+    # سیستم هوشمند پرسش و پاسخ از روی تصویر فارسی
+    1. تصویر را بارگذاری کنید تا متن استخراج شود.
+    2. سوال خود را به فارسی تایپ کنید.
+    3. دکمه «پاسخ» را بزنید.
     """)
     with gr.Row():
         with gr.Column():
             img_input = gr.Image(label="تصویر ورودی", type="numpy")
+            question_input = gr.Textbox(label="سوال شما به فارسی", placeholder="مثلاً: نویسنده این متن کیست؟", lines=1)
+            process_btn = gr.Button("پاسخ")
         with gr.Column():
+            context_output = gr.Textbox(label="متن استخراج شده", lines=10, max_lines=None, interactive=False)
+            answer_output = gr.Textbox(label="پاسخ مدل", lines=3, max_lines=None, interactive=False)
     process_btn.click(
+        fn=pipeline,
+        inputs=[img_input, question_input],
+        outputs=[context_output, answer_output]
     )
 if __name__ == "__main__":