from transformers import pipeline, AutoTokenizer
import gradio as gr

# Load tokenizer with use_fast=False
tokenizer = AutoTokenizer.from_pretrained("SuperSl6/Arabic-Text-Correction", use_fast=False)
model = pipeline(
    "text2text-generation",
    model="SuperSl6/Arabic-Text-Correction",
    tokenizer=tokenizer
)

def correct_text(input_text):
    result = model(
        input_text,
        max_length=50,               # Limit output length
        no_repeat_ngram_size=2,      # Prevent repeating bigrams
        repetition_penalty=1.5,      # Penalize repetitions
        num_return_sequences=1       # Return a single output
    )[0]['generated_text']
    return result

# Gradio Interface
interface = gr.Interface(
    fn=correct_text,
    inputs=gr.Textbox(lines=3, placeholder="أدخل النص العربي هنا..."),
    outputs=gr.Textbox(),
    live=True,
    title="تصحيح النص العربي",
    description="أداة لتصحيح النصوص العربية باستخدام نموذج SuperSl6/Arabic-Text-Correction."
)

interface.launch()