OmidSakaki commited on
Commit
24f0403
·
verified ·
1 Parent(s): e96a8df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -38
app.py CHANGED
@@ -1,10 +1,9 @@
1
  import gradio as gr
2
  import time
 
3
  from PIL import Image
4
  from paddleocr import PaddleOCR
5
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
6
- import pytesseract
7
- import numpy as np
8
 
9
  # Initialize models
10
  paddle_ocr = PaddleOCR(lang='fa', use_textline_orientation=True)
@@ -13,99 +12,90 @@ trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-pr
13
 
14
  def run_paddleocr(image):
15
  """Run PaddleOCR on image"""
16
- image_path = "temp.jpg"
17
- image.save(image_path)
18
- result = paddle_ocr.ocr(image_path, cls=True)
 
 
19
  text = ' '.join([line[1][0] for line in result[0]]) if result else ''
20
  return text
21
 
22
  def run_trocr(image):
23
  """Run TrOCR on image"""
 
 
 
 
24
  pixel_values = trocr_processor(image, return_tensors="pt").pixel_values
25
  generated_ids = trocr_model.generate(pixel_values)
26
  return trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
27
 
28
- def run_tesseract(image):
29
- """Run Tesseract OCR on image"""
30
- return pytesseract.image_to_string(image, lang='fas')
31
-
32
  def compare_models(image):
33
- """Compare all three OCR models"""
34
  # Convert to RGB if needed
35
  if isinstance(image, np.ndarray):
36
  image = Image.fromarray(image)
37
  image = image.convert("RGB")
38
 
39
  results = {}
 
40
 
41
  # Run PaddleOCR
42
  start = time.time()
43
  results['PaddleOCR'] = run_paddleocr(image)
44
- paddle_time = time.time() - start
45
 
46
  # Run TrOCR
47
  start = time.time()
48
  results['TrOCR'] = run_trocr(image)
49
- trocr_time = time.time() - start
50
-
51
- # Run Tesseract
52
- start = time.time()
53
- results['Tesseract'] = run_tesseract(image)
54
- tesseract_time = time.time() - start
55
 
56
  # Create comparison table
57
  comparison = f"""
58
- <table>
59
- <tr>
60
- <th>مدل</th>
61
- <th>متن استخراج شده</th>
62
- <th>زمان پردازش (ثانیه)</th>
63
- </tr>
64
  <tr>
65
- <td>PaddleOCR</td>
66
- <td>{results['PaddleOCR']}</td>
67
- <td>{paddle_time:.2f}</td>
68
  </tr>
69
  <tr>
70
- <td>TrOCR</td>
71
- <td>{results['TrOCR']}</td>
72
- <td>{trocr_time:.2f}</td>
73
  </tr>
74
  <tr>
75
- <td>Tesseract</td>
76
- <td>{results['Tesseract']}</td>
77
- <td>{tesseract_time:.2f}</td>
78
  </tr>
79
  </table>
80
  """
81
 
82
- return comparison, results['PaddleOCR'], results['TrOCR'], results['Tesseract']
83
 
84
  # Create Gradio interface
85
  with gr.Blocks(title="مقایسه مدل‌های OCR فارسی") as demo:
86
  gr.Markdown("""
87
  ## مقایسه عملکرد مدل‌های OCR برای زبان فارسی
88
- این برنامه سه مدل مختلف OCR را روی تصاویر فارسی مقایسه می‌کند:
89
  1. PaddleOCR
90
  2. TrOCR (مایکروسافت)
91
- 3. Tesseract OCR
92
  """)
93
 
94
  with gr.Row():
95
  with gr.Column():
96
  image_input = gr.Image(label="تصویر ورودی", type="pil")
97
- submit_btn = gr.Button("مقایسه مدل‌ها")
98
 
99
  with gr.Column():
100
  comparison_output = gr.HTML(label="نتایج مقایسه")
101
  paddle_output = gr.Textbox(label="PaddleOCR")
102
  trocr_output = gr.Textbox(label="TrOCR")
103
- tesseract_output = gr.Textbox(label="Tesseract")
104
 
105
  submit_btn.click(
106
  fn=compare_models,
107
  inputs=image_input,
108
- outputs=[comparison_output, paddle_output, trocr_output, tesseract_output]
109
  )
110
 
111
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import time
3
+ import numpy as np
4
  from PIL import Image
5
  from paddleocr import PaddleOCR
6
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 
 
7
 
8
  # Initialize models
9
  paddle_ocr = PaddleOCR(lang='fa', use_textline_orientation=True)
 
12
 
13
  def run_paddleocr(image):
14
  """Run PaddleOCR on image"""
15
+ # Convert to numpy array if needed
16
+ if isinstance(image, Image.Image):
17
+ image = np.array(image)
18
+
19
+ result = paddle_ocr.ocr(image, cls=True)
20
  text = ' '.join([line[1][0] for line in result[0]]) if result else ''
21
  return text
22
 
23
  def run_trocr(image):
24
  """Run TrOCR on image"""
25
+ # Convert to PIL Image if needed
26
+ if isinstance(image, np.ndarray):
27
+ image = Image.fromarray(image)
28
+
29
  pixel_values = trocr_processor(image, return_tensors="pt").pixel_values
30
  generated_ids = trocr_model.generate(pixel_values)
31
  return trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
32
 
 
 
 
 
33
  def compare_models(image):
34
+ """Compare PaddleOCR and TrOCR models"""
35
  # Convert to RGB if needed
36
  if isinstance(image, np.ndarray):
37
  image = Image.fromarray(image)
38
  image = image.convert("RGB")
39
 
40
  results = {}
41
+ times = {}
42
 
43
  # Run PaddleOCR
44
  start = time.time()
45
  results['PaddleOCR'] = run_paddleocr(image)
46
+ times['PaddleOCR'] = time.time() - start
47
 
48
  # Run TrOCR
49
  start = time.time()
50
  results['TrOCR'] = run_trocr(image)
51
+ times['TrOCR'] = time.time() - start
 
 
 
 
 
52
 
53
  # Create comparison table
54
  comparison = f"""
55
+ <table style="width:100%">
 
 
 
 
 
56
  <tr>
57
+ <th style="text-align:center">مدل</th>
58
+ <th style="text-align:center">متن استخراج شده</th>
59
+ <th style="text-align:center">زمان پردازش (ثانیه)</th>
60
  </tr>
61
  <tr>
62
+ <td style="text-align:center">PaddleOCR</td>
63
+ <td style="text-align:right; direction:rtl">{results['PaddleOCR']}</td>
64
+ <td style="text-align:center">{times['PaddleOCR']:.3f}</td>
65
  </tr>
66
  <tr>
67
+ <td style="text-align:center">TrOCR</td>
68
+ <td style="text-align:right; direction:rtl">{results['TrOCR']}</td>
69
+ <td style="text-align:center">{times['TrOCR']:.3f}</td>
70
  </tr>
71
  </table>
72
  """
73
 
74
+ return comparison, results['PaddleOCR'], results['TrOCR']
75
 
76
  # Create Gradio interface
77
  with gr.Blocks(title="مقایسه مدل‌های OCR فارسی") as demo:
78
  gr.Markdown("""
79
  ## مقایسه عملکرد مدل‌های OCR برای زبان فارسی
80
+ این برنامه دو مدل مختلف OCR را روی تصاویر فارسی مقایسه می‌کند:
81
  1. PaddleOCR
82
  2. TrOCR (مایکروسافت)
 
83
  """)
84
 
85
  with gr.Row():
86
  with gr.Column():
87
  image_input = gr.Image(label="تصویر ورودی", type="pil")
88
+ submit_btn = gr.Button("مقایسه مدل‌ها", variant="primary")
89
 
90
  with gr.Column():
91
  comparison_output = gr.HTML(label="نتایج مقایسه")
92
  paddle_output = gr.Textbox(label="PaddleOCR")
93
  trocr_output = gr.Textbox(label="TrOCR")
 
94
 
95
  submit_btn.click(
96
  fn=compare_models,
97
  inputs=image_input,
98
+ outputs=[comparison_output, paddle_output, trocr_output]
99
  )
100
 
101
  if __name__ == "__main__":