Spaces:
Sleeping
Sleeping
from transformers import pipeline, AutoTokenizer | |
import gradio as gr | |
import re | |
# Load tokenizer with use_fast=False | |
tokenizer = AutoTokenizer.from_pretrained("SuperSl6/Arabic-Text-Correction", use_fast=False) | |
model = pipeline( | |
"text2text-generation", | |
model="SuperSl6/Arabic-Text-Correction", | |
tokenizer=tokenizer | |
) | |
def correct_text(input_text): | |
result = model( | |
input_text, | |
max_length=50, | |
no_repeat_ngram_size=2, | |
repetition_penalty=1.5, | |
num_return_sequences=1 | |
)[0]['generated_text'] | |
# Extract the first occurrence of corrected Arabic word(s) | |
matches = re.findall(r'[\u0600-\u06FF]+', result) | |
corrected_text = matches[0] if matches else result | |
return corrected_text | |
# Gradio Interface | |
interface = gr.Interface( | |
fn=correct_text, | |
inputs=gr.Textbox(lines=3, placeholder="أدخل النص العربي هنا..."), | |
outputs=gr.Textbox(), | |
live=True, | |
title="تصحيح النص العربي", | |
description="أداة لتصحيح النصوص العربية باستخدام نموذج SuperSl6/Arabic-Text-Correction." | |
) | |
interface.launch() | |