SuperSl6's picture
Update app.py
102175e verified
raw
history blame
1.14 kB
from transformers import pipeline, AutoTokenizer
import gradio as gr
import re
# Load tokenizer with use_fast=False
tokenizer = AutoTokenizer.from_pretrained("SuperSl6/Arabic-Text-Correction", use_fast=False)
model = pipeline(
"text2text-generation",
model="SuperSl6/Arabic-Text-Correction",
tokenizer=tokenizer
)
def correct_text(input_text):
result = model(
input_text,
max_length=50,
no_repeat_ngram_size=2,
repetition_penalty=1.5,
num_return_sequences=1
)[0]['generated_text']
# Extract the first occurrence of corrected Arabic word(s)
matches = re.findall(r'[\u0600-\u06FF]+', result)
corrected_text = matches[0] if matches else result
return corrected_text
# Gradio Interface
interface = gr.Interface(
fn=correct_text,
inputs=gr.Textbox(lines=3, placeholder="أدخل النص العربي هنا..."),
outputs=gr.Textbox(),
live=True,
title="تصحيح النص العربي",
description="أداة لتصحيح النصوص العربية باستخدام نموذج SuperSl6/Arabic-Text-Correction."
)
interface.launch()