Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

OmidSakaki commited on Jul 2

Commit

c55b6a8

verified ·

1 Parent(s): 77d9d02

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ class OCRProcessor:
 class TextCorrector:
     def __init__(self):
-        model_name = "persiannlp/mt5-small-parsinlu-arc-comqa-question"
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(model_name)
             self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
@@ -29,20 +29,19 @@ class TextCorrector:
             return text
         try:
             inputs = self.tokenizer(
-                "اصلاح متن: " + text,
                 return_tensors="pt",
                 max_length=512,
                 truncation=True
             )
             outputs = self.model.generate(
                 **inputs,
                 max_length=512,
                 num_beams=5,
                 early_stopping=True
             )
             return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
         except Exception as e:
             print(f"خطا در تصحیح متن: {e}")
@@ -50,12 +49,8 @@ class TextCorrector:
 def full_processing(image: np.ndarray) -> Tuple[str, str]:
     try:
-        # استخراج متن از تصویر
         ocr_text = OCRProcessor().extract_text(image)
-        # تصحیح متن با مدل زبانی
         corrected_text = TextCorrector().correct(ocr_text)
         return ocr_text, corrected_text
     except Exception as e:
         error_msg = f"خطا: {str(e)}"

 class TextCorrector:
     def __init__(self):
+        model_name = "HooshvareLab/mt5-small-fa"
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(model_name)
             self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
             return text
         try:
+            prompt = "تصحیح نگارشی متن: " + text
             inputs = self.tokenizer(
+                prompt,
                 return_tensors="pt",
                 max_length=512,
                 truncation=True
             )
             outputs = self.model.generate(
                 **inputs,
                 max_length=512,
                 num_beams=5,
                 early_stopping=True
             )
             return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
         except Exception as e:
             print(f"خطا در تصحیح متن: {e}")
 def full_processing(image: np.ndarray) -> Tuple[str, str]:
     try:
         ocr_text = OCRProcessor().extract_text(image)
         corrected_text = TextCorrector().correct(ocr_text)
         return ocr_text, corrected_text
     except Exception as e:
         error_msg = f"خطا: {str(e)}"