File size: 1,136 Bytes
227b2b4
d1b79e0
102175e
d1b79e0
227b2b4
 
a827e42
 
 
 
 
d1b79e0
 
a827e42
 
102175e
 
 
 
a827e42
102175e
 
 
 
 
 
d1b79e0
 
 
 
 
 
227b2b4
d1b79e0
 
 
 
a827e42
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from transformers import pipeline, AutoTokenizer
import gradio as gr
import re

# Load tokenizer with use_fast=False
tokenizer = AutoTokenizer.from_pretrained("SuperSl6/Arabic-Text-Correction", use_fast=False)
model = pipeline(
    "text2text-generation",
    model="SuperSl6/Arabic-Text-Correction",
    tokenizer=tokenizer
)

def correct_text(input_text):
    result = model(
        input_text,
        max_length=50,
        no_repeat_ngram_size=2,
        repetition_penalty=1.5,
        num_return_sequences=1
    )[0]['generated_text']

    # Extract the first occurrence of corrected Arabic word(s)
    matches = re.findall(r'[\u0600-\u06FF]+', result)
    corrected_text = matches[0] if matches else result

    return corrected_text

# Gradio Interface
interface = gr.Interface(
    fn=correct_text,
    inputs=gr.Textbox(lines=3, placeholder="أدخل النص العربي هنا..."),
    outputs=gr.Textbox(),
    live=True,
    title="تصحيح النص العربي",
    description="أداة لتصحيح النصوص العربية باستخدام نموذج SuperSl6/Arabic-Text-Correction."
)

interface.launch()