OmidSakaki commited on
Commit
2bf547d
·
verified ·
1 Parent(s): 999a6b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -42
app.py CHANGED
@@ -1,57 +1,112 @@
1
  import gradio as gr
2
- from paddleocr import PaddleOCR
3
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  from PIL import Image
5
- import os
 
 
 
6
 
7
- # --- مدل‌ها ---
8
- try:
9
- model_name = "m3hrdadfi/mt5-small-finetuned-grammar-synthesis"
10
-
11
- ocr_model = PaddleOCR(lang='fa', use_textline_orientation=True)
12
- tokenizer = AutoTokenizer.from_pretrained(model_name)
13
- nlp_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
14
-
15
- except Exception as e:
16
- raise gr.Error(f"خطا در بارگذاری مدل‌ها: لطفاً این خطا را به توسعه دهنده گزارش دهید. خطا: {str(e)}")
17
 
18
- # --- توابع پردازش ---
19
- def run_ocr(image):
20
- image_path = image.name
21
- result = ocr_model.ocr(image_path, cls=True)
22
- texts = [line[1][0] for line in result[0]] if result else []
23
- os.remove(image_path)
24
- return " ".join(texts)
25
 
26
- def postprocess_text(text):
27
- inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
28
- outputs = nlp_model.generate(**inputs)
29
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- # --- رابط کاربری ---
32
- with gr.Blocks() as app:
33
- gr.Markdown("## سیستم OCR فارسی با پردازش پیشرفته متن")
 
 
 
 
 
 
34
 
35
  with gr.Row():
36
  with gr.Column():
37
- image_input = gr.Image(type="filepath", label="تصویر ورودی")
38
- process_btn = gr.Button("پردازش تصویر")
39
 
40
  with gr.Column():
41
- raw_output = gr.Textbox(label="متن استخراج شده")
42
- processed_output = gr.Textbox(label="متن پردازش شده")
43
-
44
- def process_image(img):
45
- # Avoid running OCR twice
46
- raw = run_ocr(img)
47
- processed = postprocess_text(raw)
48
- return raw, processed
49
-
50
- process_btn.click(
51
- fn=process_image,
52
  inputs=image_input,
53
- outputs=[raw_output, processed_output]
54
  )
55
 
56
  if __name__ == "__main__":
57
- app.launch()
 
1
  import gradio as gr
2
+ import time
 
3
  from PIL import Image
4
+ from paddleocr import PaddleOCR
5
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
6
+ import pytesseract
7
+ import numpy as np
8
 
9
+ # Initialize models
10
+ paddle_ocr = PaddleOCR(lang='fa', use_textline_orientation=True)
11
+ trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
12
+ trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
 
 
 
 
 
 
13
 
14
+ def run_paddleocr(image):
15
+ """Run PaddleOCR on image"""
16
+ image_path = "temp.jpg"
17
+ image.save(image_path)
18
+ result = paddle_ocr.ocr(image_path, cls=True)
19
+ text = ' '.join([line[1][0] for line in result[0]]) if result else ''
20
+ return text
21
 
22
+ def run_trocr(image):
23
+ """Run TrOCR on image"""
24
+ pixel_values = trocr_processor(image, return_tensors="pt").pixel_values
25
+ generated_ids = trocr_model.generate(pixel_values)
26
+ return trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
27
+
28
+ def run_tesseract(image):
29
+ """Run Tesseract OCR on image"""
30
+ return pytesseract.image_to_string(image, lang='fas')
31
+
32
+ def compare_models(image):
33
+ """Compare all three OCR models"""
34
+ # Convert to RGB if needed
35
+ if isinstance(image, np.ndarray):
36
+ image = Image.fromarray(image)
37
+ image = image.convert("RGB")
38
+
39
+ results = {}
40
+
41
+ # Run PaddleOCR
42
+ start = time.time()
43
+ results['PaddleOCR'] = run_paddleocr(image)
44
+ paddle_time = time.time() - start
45
+
46
+ # Run TrOCR
47
+ start = time.time()
48
+ results['TrOCR'] = run_trocr(image)
49
+ trocr_time = time.time() - start
50
+
51
+ # Run Tesseract
52
+ start = time.time()
53
+ results['Tesseract'] = run_tesseract(image)
54
+ tesseract_time = time.time() - start
55
+
56
+ # Create comparison table
57
+ comparison = f"""
58
+ <table>
59
+ <tr>
60
+ <th>مدل</th>
61
+ <th>متن استخراج شده</th>
62
+ <th>زمان پردازش (ثانیه)</th>
63
+ </tr>
64
+ <tr>
65
+ <td>PaddleOCR</td>
66
+ <td>{results['PaddleOCR']}</td>
67
+ <td>{paddle_time:.2f}</td>
68
+ </tr>
69
+ <tr>
70
+ <td>TrOCR</td>
71
+ <td>{results['TrOCR']}</td>
72
+ <td>{trocr_time:.2f}</td>
73
+ </tr>
74
+ <tr>
75
+ <td>Tesseract</td>
76
+ <td>{results['Tesseract']}</td>
77
+ <td>{tesseract_time:.2f}</td>
78
+ </tr>
79
+ </table>
80
+ """
81
+
82
+ return comparison, results['PaddleOCR'], results['TrOCR'], results['Tesseract']
83
 
84
+ # Create Gradio interface
85
+ with gr.Blocks(title="مقایسه مدل‌های OCR فارسی") as demo:
86
+ gr.Markdown("""
87
+ ## مقایسه عملکرد مدل‌های OCR برای زبان فارسی
88
+ این برنامه سه مدل مختلف OCR را روی تصاویر فارسی مقایسه می‌کند:
89
+ 1. PaddleOCR
90
+ 2. TrOCR (مایکروسافت)
91
+ 3. Tesseract OCR
92
+ """)
93
 
94
  with gr.Row():
95
  with gr.Column():
96
+ image_input = gr.Image(label="تصویر ورودی", type="pil")
97
+ submit_btn = gr.Button("مقایسه مدل‌ها")
98
 
99
  with gr.Column():
100
+ comparison_output = gr.HTML(label="نتایج مقایسه")
101
+ paddle_output = gr.Textbox(label="PaddleOCR")
102
+ trocr_output = gr.Textbox(label="TrOCR")
103
+ tesseract_output = gr.Textbox(label="Tesseract")
104
+
105
+ submit_btn.click(
106
+ fn=compare_models,
 
 
 
 
107
  inputs=image_input,
108
+ outputs=[comparison_output, paddle_output, trocr_output, tesseract_output]
109
  )
110
 
111
  if __name__ == "__main__":
112
+ demo.launch()