Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,122 +1,90 @@
|
|
1 |
import gradio as gr
|
2 |
-
import time
|
3 |
-
import numpy as np
|
4 |
-
from PIL import Image
|
5 |
-
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
6 |
import easyocr
|
7 |
-
from
|
|
|
|
|
|
|
8 |
|
9 |
-
# Initialize
|
10 |
-
|
11 |
-
"EasyOCR": easyocr.Reader(['fa']), # تنظیم زبان فارسی
|
12 |
-
"TrOCR": {
|
13 |
-
"processor": TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed"),
|
14 |
-
"model": VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
|
15 |
-
},
|
16 |
-
"DocTR": ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
|
17 |
-
}
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
result = models["EasyOCR"].readtext(np.array(image), detail=0)
|
23 |
-
return ' '.join(result) if result else ''
|
24 |
-
except Exception as e:
|
25 |
-
return f"خطا: {str(e)}"
|
26 |
|
27 |
-
def
|
28 |
-
"""
|
29 |
try:
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
33 |
except Exception as e:
|
34 |
-
return f"
|
35 |
|
36 |
-
def
|
37 |
-
"""
|
38 |
try:
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
return ' '.join([word[0] for page in result.pages for block in page.blocks
|
43 |
-
for line in block.lines for word in line.words])
|
44 |
except Exception as e:
|
45 |
-
return f"
|
46 |
|
47 |
-
def
|
48 |
-
"""
|
49 |
-
|
50 |
-
|
51 |
-
image = image.convert("RGB")
|
52 |
-
|
53 |
-
results = {}
|
54 |
-
times = {}
|
55 |
-
|
56 |
-
# اجرای تمام مدلهای OCR
|
57 |
-
for name, func in [("EasyOCR", run_easyocr),
|
58 |
-
("TrOCR", run_trocr),
|
59 |
-
("DocTR", run_doctr)]:
|
60 |
-
start = time.time()
|
61 |
-
results[name] = func(image)
|
62 |
-
times[name] = time.time() - start
|
63 |
|
64 |
-
#
|
65 |
-
|
66 |
-
for name in results:
|
67 |
-
table_rows.append(f"""
|
68 |
-
<tr>
|
69 |
-
<td style="padding: 8px; border: 1px solid #ddd; text-align: center; font-weight: bold;">{name}</td>
|
70 |
-
<td style="padding: 8px; border: 1px solid #ddd; text-align: right; direction: rtl;">{results[name]}</td>
|
71 |
-
<td style="padding: 8px; border: 1px solid #ddd; text-align: center;">{times[name]:.3f} ثانیه</td>
|
72 |
-
</tr>
|
73 |
-
""")
|
74 |
|
75 |
-
|
76 |
-
<div style="overflow-x: auto;">
|
77 |
-
<table style="width:100%; border-collapse: collapse; margin: 15px 0; font-family: Arial, sans-serif;">
|
78 |
-
<tr style="background-color: #4CAF50; color: white;">
|
79 |
-
<th style="padding: 12px; border: 1px solid #ddd; text-align: center;">مدل</th>
|
80 |
-
<th style="padding: 12px; border: 1px solid #ddd; text-align: center;">متن استخراج شده</th>
|
81 |
-
<th style="padding: 12px; border: 1px solid #ddd; text-align: center;">زمان پردازش</th>
|
82 |
-
</tr>
|
83 |
-
{''.join(table_rows)}
|
84 |
-
</table>
|
85 |
-
</div>
|
86 |
-
"""
|
87 |
-
|
88 |
-
return comparison, results['EasyOCR'], results['TrOCR'], results['DocTR']
|
89 |
|
90 |
# رابط کاربری Gradio
|
91 |
-
with gr.Blocks(title="
|
92 |
gr.Markdown("""
|
93 |
-
|
94 |
-
|
95 |
""")
|
96 |
|
97 |
with gr.Row():
|
98 |
with gr.Column():
|
99 |
-
|
100 |
-
gr.
|
101 |
-
examples=["sample_fa1.jpg", "sample_fa2.png"],
|
102 |
-
inputs=img_input,
|
103 |
-
label="تصاویر نمونه"
|
104 |
-
)
|
105 |
-
submit_btn = gr.Button("مقایسه مدلها", variant="primary")
|
106 |
|
107 |
with gr.Column():
|
108 |
-
|
109 |
-
|
110 |
-
gr.
|
111 |
-
|
112 |
-
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
|
|
119 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
if __name__ == "__main__":
|
122 |
-
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
2 |
import easyocr
|
3 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
4 |
+
from PIL import Image
|
5 |
+
import numpy as np
|
6 |
+
import os
|
7 |
|
8 |
+
# Initialize EasyOCR for Persian
|
9 |
+
reader = easyocr.Reader(['fa'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
+
# Load NLP model for text correction
|
12 |
+
tokenizer = AutoTokenizer.from_pretrained("persiannlp/mt5-small-parsinlu-grammar-correction")
|
13 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("persiannlp/mt5-small-parsinlu-grammar-correction")
|
|
|
|
|
|
|
|
|
14 |
|
15 |
+
def run_ocr(image):
|
16 |
+
"""استخراج متن از تصویر با EasyOCR"""
|
17 |
try:
|
18 |
+
# Convert PIL Image to numpy array if needed
|
19 |
+
if isinstance(image, Image.Image):
|
20 |
+
image = np.array(image)
|
21 |
+
|
22 |
+
results = reader.readtext(image)
|
23 |
+
texts = [result[1] for result in results]
|
24 |
+
return " ".join(texts) if texts else "متنی یافت نشد!"
|
25 |
except Exception as e:
|
26 |
+
return f"خطا در پردازش تصویر: {str(e)}"
|
27 |
|
28 |
+
def postprocess_text(text):
|
29 |
+
"""پردازش متن با مدل زبانی"""
|
30 |
try:
|
31 |
+
inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
|
32 |
+
outputs = model.generate(**inputs)
|
33 |
+
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
|
34 |
except Exception as e:
|
35 |
+
return f"خطا در پردازش متن: {str(e)}"
|
36 |
|
37 |
+
def process_image(image):
|
38 |
+
"""پردازش کامل تصویر"""
|
39 |
+
# استخراج متن
|
40 |
+
raw_text = run_ocr(image)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
+
# پردازش متن
|
43 |
+
processed_text = postprocess_text(raw_text) if raw_text and raw_text != "متنی یافت نشد!" else raw_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
return raw_text, processed_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
# رابط کاربری Gradio
|
48 |
+
with gr.Blocks(title="OCR فارسی با پردازش NLP") as app:
|
49 |
gr.Markdown("""
|
50 |
+
## 🔠 OCR فارسی + پردازش متن با مدل زبانی
|
51 |
+
متن را از تصاویر استخراج کنید و با مدل زبانی اصلاح کنید!
|
52 |
""")
|
53 |
|
54 |
with gr.Row():
|
55 |
with gr.Column():
|
56 |
+
image_input = gr.Image(label="تصویر حاوی متن فارسی را آپلود کنید", type="pil")
|
57 |
+
process_btn = gr.Button("پردازش تصویر", variant="primary")
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
with gr.Column():
|
60 |
+
with gr.Tab("نتایج"):
|
61 |
+
raw_output = gr.Textbox(label="متن خام (OCR)")
|
62 |
+
processed_output = gr.Textbox(label="متن پردازششده (NLP)")
|
63 |
+
|
64 |
+
with gr.Tab("تصویر"):
|
65 |
+
image_output = gr.Image(label="تصویر ورودی")
|
66 |
+
|
67 |
+
# پردازش هنگام کلیک یا آپلود تصویر
|
68 |
+
process_btn.click(
|
69 |
+
fn=process_image,
|
70 |
+
inputs=image_input,
|
71 |
+
outputs=[raw_output, processed_output]
|
72 |
+
)
|
73 |
|
74 |
+
# نمایش خودکار تصویر ورودی
|
75 |
+
image_input.change(
|
76 |
+
fn=lambda img: img,
|
77 |
+
inputs=image_input,
|
78 |
+
outputs=image_output
|
79 |
)
|
80 |
+
|
81 |
+
gr.Markdown("---")
|
82 |
+
gr.Markdown("""
|
83 |
+
**راهنما**:
|
84 |
+
1. تصویری حاوی متن فارسی آپلود کنید
|
85 |
+
2. روی دکمه 'پردازش تصویر' کلیک کنید
|
86 |
+
3. نتایج استخراج متن و پردازش زبان طبیعی را مشاهده کنید
|
87 |
+
""")
|
88 |
|
89 |
if __name__ == "__main__":
|
90 |
+
app.launch()
|