Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

App Files Files Community

DocQA_Agent / app.py

OmidSakaki

Update app.py

24f0403 verified about 2 months ago

raw

history blame

3.54 kB

	import gradio as gr
	import time
	import numpy as np
	from PIL import Image
	from paddleocr import PaddleOCR
	from transformers import TrOCRProcessor, VisionEncoderDecoderModel

	# Initialize models
	paddle_ocr = PaddleOCR(lang='fa', use_textline_orientation=True)
	trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
	trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")

	def run_paddleocr(image):
	"""Run PaddleOCR on image"""
	# Convert to numpy array if needed
	if isinstance(image, Image.Image):
	image = np.array(image)

	result = paddle_ocr.ocr(image, cls=True)
	text = ' '.join([line[1][0] for line in result[0]]) if result else ''
	return text

	def run_trocr(image):
	"""Run TrOCR on image"""
	# Convert to PIL Image if needed
	if isinstance(image, np.ndarray):
	image = Image.fromarray(image)

	pixel_values = trocr_processor(image, return_tensors="pt").pixel_values
	generated_ids = trocr_model.generate(pixel_values)
	return trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

	def compare_models(image):
	"""Compare PaddleOCR and TrOCR models"""
	# Convert to RGB if needed
	if isinstance(image, np.ndarray):
	image = Image.fromarray(image)
	image = image.convert("RGB")

	results = {}
	times = {}

	# Run PaddleOCR
	start = time.time()
	results['PaddleOCR'] = run_paddleocr(image)
	times['PaddleOCR'] = time.time() - start

	# Run TrOCR
	start = time.time()
	results['TrOCR'] = run_trocr(image)
	times['TrOCR'] = time.time() - start

	# Create comparison table
	comparison = f"""
	<table style="width:100%">
	<tr>
	<th style="text-align:center">مدل</th>
	<th style="text-align:center">متن استخراج شده</th>
	<th style="text-align:center">زمان پردازش (ثانیه)</th>
	</tr>
	<tr>
	<td style="text-align:center">PaddleOCR</td>
	<td style="text-align:right; direction:rtl">{results['PaddleOCR']}</td>
	<td style="text-align:center">{times['PaddleOCR']:.3f}</td>
	</tr>
	<tr>
	<td style="text-align:center">TrOCR</td>
	<td style="text-align:right; direction:rtl">{results['TrOCR']}</td>
	<td style="text-align:center">{times['TrOCR']:.3f}</td>
	</tr>
	</table>
	"""

	return comparison, results['PaddleOCR'], results['TrOCR']

	# Create Gradio interface
	with gr.Blocks(title="مقایسه مدل‌های OCR فارسی") as demo:
	gr.Markdown("""
	## مقایسه عملکرد مدل‌های OCR برای زبان فارسی
	این برنامه دو مدل مختلف OCR را روی تصاویر فارسی مقایسه می‌کند:
	1. PaddleOCR
	2. TrOCR (مایکروسافت)
	""")

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(label="تصویر ورودی", type="pil")
	submit_btn = gr.Button("مقایسه مدل‌ها", variant="primary")

	with gr.Column():
	comparison_output = gr.HTML(label="نتایج مقایسه")
	paddle_output = gr.Textbox(label="PaddleOCR")
	trocr_output = gr.Textbox(label="TrOCR")

	submit_btn.click(
	fn=compare_models,
	inputs=image_input,
	outputs=[comparison_output, paddle_output, trocr_output]
	)

	if __name__ == "__main__":
	demo.launch()