File size: 1,072 Bytes
227b2b4
d1b79e0
 
227b2b4
 
a827e42
 
 
 
 
d1b79e0
 
a827e42
 
 
 
 
 
 
d1b79e0
 
 
 
 
 
 
227b2b4
d1b79e0
 
 
 
a827e42
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from transformers import pipeline, AutoTokenizer
import gradio as gr

# Load tokenizer with use_fast=False
tokenizer = AutoTokenizer.from_pretrained("SuperSl6/Arabic-Text-Correction", use_fast=False)
model = pipeline(
    "text2text-generation",
    model="SuperSl6/Arabic-Text-Correction",
    tokenizer=tokenizer
)

def correct_text(input_text):
    result = model(
        input_text,
        max_length=50,               # Limit output length
        no_repeat_ngram_size=2,      # Prevent repeating bigrams
        repetition_penalty=1.5,      # Penalize repetitions
        num_return_sequences=1       # Return a single output
    )[0]['generated_text']
    return result

# Gradio Interface
interface = gr.Interface(
    fn=correct_text,
    inputs=gr.Textbox(lines=3, placeholder="أدخل النص العربي هنا..."),
    outputs=gr.Textbox(),
    live=True,
    title="تصحيح النص العربي",
    description="أداة لتصحيح النصوص العربية باستخدام نموذج SuperSl6/Arabic-Text-Correction."
)

interface.launch()