Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

App Files Files Community

OmidSakaki commited on Jul 2

Commit

f5ea811

verified ·

1 Parent(s): a3b6ec8

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -23

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import easyocr
-from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 import numpy as np
 from typing import Tuple
@@ -26,13 +26,14 @@ class TextPostProcessor:
             '۵': '5', '۶': '6', '۷': '7', '۸': '8', '۹': '9'
         }
         try:
             self.llm = pipeline(
-                "text-generation",
-                model="HooshvareLab/gpt2-fa",
-                tokenizer="HooshvareLab/gpt2-fa"
             )
         except Exception as e:
-            print("خطا در بارگذاری مدل زبانی:", e)
             self.llm = None
     def preprocess(self, text: str) -> str:
@@ -44,30 +45,19 @@ class TextPostProcessor:
         return " ".join(text.split())
     def enhance_with_llm(self, text: str) -> str:
-        """بازنویسی یا بهبود متن با LLM فارسی"""
         if not text or not self.llm:
             return text
-        prompt = f"متن زیر را بازنویسی کن و به صورت روان و صحیح برگردان:\n{text}\nبازنویسی:"
         try:
-            output = self.llm(
-                prompt,
-                max_length=len(prompt) + len(text) + 60,
-                num_return_sequences=1,
-                do_sample=True,
-                temperature=0.9,
-                pad_token_id=0,
-                eos_token_id=2
-            )
-            gen_text = output[0]['generated_text']
-            # فقط بخش بازنویسی شده را جدا کن
-            if "بازنویسی:" in gen_text:
-                gen_text = gen_text.split("بازنویسی:")[-1].strip()
             # اگر بازنویسی مدل بی‌معنا یا کوتاه بود، همان متن را برگردان
-            if len(gen_text) < 8:
                 return text
-            return gen_text
         except Exception as e:
-            print("خطا در بازنویسی با LLM:", e)
             return text
 ## 2. پایپلاین اصلی

 import gradio as gr
 import easyocr
+from transformers import pipeline
 import numpy as np
 from typing import Tuple
             '۵': '5', '۶': '6', '۷': '7', '۸': '8', '۹': '9'
         }
         try:
+            # استفاده از مدل instruction-tuned مناسب بازنویسی
             self.llm = pipeline(
+                "text2text-generation",
+                model="ParsiAI/gpt2-medium-fa-instruction",
+                tokenizer="ParsiAI/gpt2-medium-fa-instruction"
             )
         except Exception as e:
+            print("خطا در بارگذاری مدل بازنویسی:", e)
             self.llm = None
     def preprocess(self, text: str) -> str:
         return " ".join(text.split())
     def enhance_with_llm(self, text: str) -> str:
+        """بازنویسی متن با مدل instruction-tuned"""
         if not text or not self.llm:
             return text
+        prompt = f"متن زیر را بازنویسی کن:\n{text}"
         try:
+            result = self.llm(prompt, max_length=256, num_return_sequences=1)
+            enhanced_text = result[0]['generated_text'].strip()
             # اگر بازنویسی مدل بی‌معنا یا کوتاه بود، همان متن را برگردان
+            if len(enhanced_text) < 8:
                 return text
+            return enhanced_text
         except Exception as e:
+            print("خطا در بازنویسی:", e)
             return text
 ## 2. پایپلاین اصلی