from transformers import pipeline, AutoTokenizer import gradio as gr import difflib # Load tokenizer tokenizer = AutoTokenizer.from_pretrained("SuperSl6/Arabic-Text-Correction", use_fast=False) model = pipeline( "text2text-generation", model="SuperSl6/Arabic-Text-Correction", tokenizer=tokenizer ) def align_and_preserve(original, corrected): original_words = original.split() corrected_words = corrected.split() matcher = difflib.SequenceMatcher(None, original_words, corrected_words) final_output = [] seen_words = set() for opcode, a0, a1, b0, b1 in matcher.get_opcodes(): if opcode == 'equal': for word in corrected_words[b0:b1]: if word not in seen_words: final_output.append(word) seen_words.add(word) elif opcode == 'delete': for word in original_words[a0:a1]: if word not in seen_words: final_output.append(word) seen_words.add(word) elif opcode == 'replace': for word in corrected_words[b0:b1]: if word not in seen_words: final_output.append(word) seen_words.add(word) for word in original_words[a0:a1]: if word not in seen_words: final_output.append(word) seen_words.add(word) for word in corrected_words[b1:]: if word not in seen_words: final_output.append(word) seen_words.add(word) return ' '.join(final_output) def extract_corrected_version(original, generated): sentences = generated.split(' . ') best_match = max(sentences, key=lambda s: difflib.SequenceMatcher(None, original, s).ratio()) corrected_text = align_and_preserve(original, best_match.strip()) return corrected_text def correct_text(input_text): result = model( input_text, max_length=50, no_repeat_ngram_size=2, repetition_penalty=1.5, num_return_sequences=1, temperature=0.7, top_p=0.9, do_sample=True )[0]['generated_text'] corrected_text = extract_corrected_version(input_text, result) return corrected_text # Gradio Interface examples = [ ["اكيد ان لحكام العرب والمسلمين مسؤولية يتمثل ادناها في استدعاء السفراء في الصين للتشاور"], ["هزا النص يحتوي على الكثير من الاخطاء الاملائية"], ["هليكم السلام ورحمة الله وبركاته"], ["انشاء الله سيكون كل شيء بخير"] ] interface = gr.Interface( fn=correct_text, inputs=gr.Textbox(lines=4, placeholder="✍️ أدخل النص العربي هنا لتصحيحه...", label="📥 النص المدخل"), outputs=gr.Textbox(label="✅ النص المصحح"), title="🚀 تصحيح النص العربي باستخدام SuperSl6/Arabic-Text-Correction", description="📝 أداة ذكية لتصحيح النصوص العربية باستخدام تقنيات الذكاء الاصطناعي. أدخل النص وسيتم تصحيحه في الوقت الفعلي!", theme="compact", examples=examples, allow_flagging="never" ) interface.launch()