from transformers import pipeline, AutoTokenizer import gradio as gr import re # Load tokenizer with use_fast=False tokenizer = AutoTokenizer.from_pretrained("SuperSl6/Arabic-Text-Correction", use_fast=False) model = pipeline( "text2text-generation", model="SuperSl6/Arabic-Text-Correction", tokenizer=tokenizer ) def correct_text(input_text): result = model( input_text, max_length=50, no_repeat_ngram_size=2, repetition_penalty=1.5, num_return_sequences=1 )[0]['generated_text'] # Extract the first occurrence of corrected Arabic word(s) matches = re.findall(r'[\u0600-\u06FF]+', result) corrected_text = matches[0] if matches else result return corrected_text # Gradio Interface interface = gr.Interface( fn=correct_text, inputs=gr.Textbox(lines=3, placeholder="أدخل النص العربي هنا..."), outputs=gr.Textbox(), live=True, title="تصحيح النص العربي", description="أداة لتصحيح النصوص العربية باستخدام نموذج SuperSl6/Arabic-Text-Correction." ) interface.launch()