SuperSl6 commited on
Commit
102175e
·
verified ·
1 Parent(s): a827e42

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -5
app.py CHANGED
@@ -1,5 +1,6 @@
1
  from transformers import pipeline, AutoTokenizer
2
  import gradio as gr
 
3
 
4
  # Load tokenizer with use_fast=False
5
  tokenizer = AutoTokenizer.from_pretrained("SuperSl6/Arabic-Text-Correction", use_fast=False)
@@ -12,12 +13,17 @@ model = pipeline(
12
  def correct_text(input_text):
13
  result = model(
14
  input_text,
15
- max_length=50, # Limit output length
16
- no_repeat_ngram_size=2, # Prevent repeating bigrams
17
- repetition_penalty=1.5, # Penalize repetitions
18
- num_return_sequences=1 # Return a single output
19
  )[0]['generated_text']
20
- return result
 
 
 
 
 
21
 
22
  # Gradio Interface
23
  interface = gr.Interface(
 
1
  from transformers import pipeline, AutoTokenizer
2
  import gradio as gr
3
+ import re
4
 
5
  # Load tokenizer with use_fast=False
6
  tokenizer = AutoTokenizer.from_pretrained("SuperSl6/Arabic-Text-Correction", use_fast=False)
 
13
  def correct_text(input_text):
14
  result = model(
15
  input_text,
16
+ max_length=50,
17
+ no_repeat_ngram_size=2,
18
+ repetition_penalty=1.5,
19
+ num_return_sequences=1
20
  )[0]['generated_text']
21
+
22
+ # Extract the first occurrence of corrected Arabic word(s)
23
+ matches = re.findall(r'[\u0600-\u06FF]+', result)
24
+ corrected_text = matches[0] if matches else result
25
+
26
+ return corrected_text
27
 
28
  # Gradio Interface
29
  interface = gr.Interface(