Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

App Files Files Community

DocQA_Agent / app.py

OmidSakaki

Update app.py

c5a772e verified about 2 months ago

raw

history blame

4.09 kB

	import gradio as gr
	import easyocr
	from transformers import pipeline
	from PIL import Image
	import numpy as np
	import os
	from typing import Tuple

	## 1. تنظیمات اولیه و مدل‌ها
	# ----------------------------------
	class OCRProcessor:
	def __init__(self):
	self.reader = easyocr.Reader(['fa'])

	def extract_text(self, image: np.ndarray) -> str:
	"""استخراج متن از تصویر با EasyOCR"""
	try:
	results = self.reader.readtext(image)
	return " ".join([result[1] for result in results]) if results else ""
	except Exception as e:
	raise RuntimeError(f"خطا در پردازش OCR: {str(e)}")

	class TextPostProcessor:
	def __init__(self):
	# تنظیمات پیش‌پردازش متن
	self.replacements = {
	'ي': 'ی', 'ك': 'ک',
	'۰':'0', '۱':'1', '۲':'2', '۳':'3', '۴':'4',
	'۵':'5', '۶':'6', '۷':'7', '۸':'8', '۹':'9'
	}

	# بارگذاری مدل زبانی
	try:
	self.llm = pipeline("text-generation", model="gpt2")
	except:
	self.llm = None

	def preprocess(self, text: str) -> str:
	"""پیش‌پردازش متن استخراج شده"""
	if not text:
	return ""

	# نرمال‌سازی متن
	for old, new in self.replacements.items():
	text = text.replace(old, new)
	return " ".join(text.split())

	def enhance_with_llm(self, text: str) -> str:
	"""بهبود متن با مدل زبانی"""
	if not text or not self.llm:
	return text

	try:
	enhanced = self.llm(
	f"اصلاح و بازنویسی متن فارسی زیر:\n{text}\n\nمتن بهبود یافته:",
	max_length=200,
	num_return_sequences=1
	)
	return enhanced[0]['generated_text']
	except:
	return text

	## 2. پردازش اصلی
	# ----------------------------------
	def full_processing(image: np.ndarray) -> Tuple[str, str]:
	"""پایپلاین کامل پردازش تصویر"""
	try:
	# 1. استخراج متن
	ocr_text = OCRProcessor().extract_text(image)

	# 2. پیش‌پردازش
	post_processor = TextPostProcessor()
	cleaned_text = post_processor.preprocess(ocr_text)

	# 3. بهبود با مدل زبانی
	enhanced_text = post_processor.enhance_with_llm(cleaned_text)

	return cleaned_text, enhanced_text

	except Exception as e:
	return f"خطا: {str(e)}", ""

	## 3. رابط کاربری Gradio
	# ----------------------------------
	with gr.Blocks(title="پایپلاین OCR فارسی با LLM") as app:
	gr.Markdown("""
	# سیستم پیشرفته پردازش متن فارسی
	استخراج متن از تصاویر + پردازش با مدل زبانی
	""")

	with gr.Row():
	with gr.Column():
	img_input = gr.Image(label="تصویر ورودی", type="numpy")
	process_btn = gr.Button("پردازش تصویر", variant="primary")

	with gr.Column():
	with gr.Tab("نتایج پردازش"):
	raw_output = gr.Textbox(label="متن استخراج شده")
	enhanced_output = gr.Textbox(label="متن بهبود یافته")

	with gr.Tab("پیش‌نمایش"):
	gr.Markdown("### تصویر ورودی")
	img_preview = gr.Image(label="", interactive=False)

	# پردازش خودکار هنگام آپلود تصویر
	img_input.change(
	fn=lambda x: x,
	inputs=img_input,
	outputs=img_preview
	)

	# پردازش اصلی هنگام کلیک دکمه
	process_btn.click(
	fn=full_processing,
	inputs=img_input,
	outputs=[raw_output, enhanced_output]
	)

	if __name__ == "__main__":
	app.launch()