from transformers import pipeline, AutoTokenizer
import gradio as gr
import re

# Load tokenizer with use_fast=False
tokenizer = AutoTokenizer.from_pretrained("SuperSl6/Arabic-Text-Correction", use_fast=False)
model = pipeline(
    "text2text-generation",
    model="SuperSl6/Arabic-Text-Correction",
    tokenizer=tokenizer
)

def correct_text(input_text):
    result = model(
        input_text,
        max_length=50,
        no_repeat_ngram_size=2,
        repetition_penalty=1.5,
        num_return_sequences=1
    )[0]['generated_text']

    # Extract the first occurrence of corrected Arabic word(s)
    matches = re.findall(r'[\u0600-\u06FF]+', result)
    corrected_text = matches[0] if matches else result

    return corrected_text

# Gradio Interface
interface = gr.Interface(
    fn=correct_text,
    inputs=gr.Textbox(lines=3, placeholder="أدخل النص العربي هنا..."),
    outputs=gr.Textbox(),
    live=True,
    title="تصحيح النص العربي",
    description="أداة لتصحيح النصوص العربية باستخدام نموذج SuperSl6/Arabic-Text-Correction."
)

interface.launch()