Spaces:

SuperSl6
/

Arabic-Text-Correction

Sleeping

App Files Files Community

Arabic-Text-Correction / app.py

SuperSl6

Update app.py

05cb4d5 verified 8 months ago

raw

history blame

2.32 kB

	from transformers import pipeline, AutoTokenizer
	import gradio as gr
	import re
	import difflib

	# Load tokenizer with use_fast=False
	tokenizer = AutoTokenizer.from_pretrained("SuperSl6/Arabic-Text-Correction", use_fast=False)
	model = pipeline(
	"text2text-generation",
	model="SuperSl6/Arabic-Text-Correction",
	tokenizer=tokenizer
	)

	def extract_corrected_version(original, generated):
	# Split generated text into sentences
	sentences = generated.split(' . ')

	# Find the sentence most similar to the original
	best_match = max(sentences, key=lambda s: difflib.SequenceMatcher(None, original, s).ratio())

	# Extract the corrected Arabic words
	corrected_words = re.findall(r'[\u0600-\u06FF]+', best_match)

	# If no corrections found, return the original input
	if not corrected_words:
	return original

	# Check if the corrected text is a proper subset of the generated text
	corrected_text = ' '.join(corrected_words)
	if corrected_text in best_match:
	# Check if the corrected text is the complete output
	if corrected_text == best_match.strip():
	return corrected_text
	else:
	# If not the complete output, find the shortest corrected phrase
	for i in range(len(corrected_words), 0, -1):
	phrase = ' '.join(corrected_words[:i])
	if phrase in best_match:
	return phrase
	# If no corrected phrase is found, return the original input
	return original

	def correct_text(input_text):
	result = model(
	input_text,
	max_length=50,
	no_repeat_ngram_size=2,
	repetition_penalty=1.5,
	num_return_sequences=1,
	temperature=0.7,
	top_p=0.9,
	do_sample=True
	)[0]['generated_text']

	# Extract the corrected version
	corrected_text = extract_corrected_version(input_text, result)
	return corrected_text

	# Gradio Interface
	interface = gr.Interface(
	fn=correct_text,
	inputs=gr.Textbox(lines=3, placeholder="أدخل النص العربي هنا..."),
	outputs=gr.Textbox(),
	live=True,
	title="تصحيح النص العربي",
	description="أداة لتصحيح النصوص العربية باستخدام نموذج SuperSl6/Arabic-Text-Correction."
	)

	interface.launch()