Spaces:
Sleeping
Sleeping
File size: 1,136 Bytes
227b2b4 d1b79e0 102175e d1b79e0 227b2b4 a827e42 d1b79e0 a827e42 102175e a827e42 102175e d1b79e0 227b2b4 d1b79e0 a827e42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
from transformers import pipeline, AutoTokenizer
import gradio as gr
import re
# Load tokenizer with use_fast=False
tokenizer = AutoTokenizer.from_pretrained("SuperSl6/Arabic-Text-Correction", use_fast=False)
model = pipeline(
"text2text-generation",
model="SuperSl6/Arabic-Text-Correction",
tokenizer=tokenizer
)
def correct_text(input_text):
result = model(
input_text,
max_length=50,
no_repeat_ngram_size=2,
repetition_penalty=1.5,
num_return_sequences=1
)[0]['generated_text']
# Extract the first occurrence of corrected Arabic word(s)
matches = re.findall(r'[\u0600-\u06FF]+', result)
corrected_text = matches[0] if matches else result
return corrected_text
# Gradio Interface
interface = gr.Interface(
fn=correct_text,
inputs=gr.Textbox(lines=3, placeholder="أدخل النص العربي هنا..."),
outputs=gr.Textbox(),
live=True,
title="تصحيح النص العربي",
description="أداة لتصحيح النصوص العربية باستخدام نموذج SuperSl6/Arabic-Text-Correction."
)
interface.launch()
|