Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,17 +8,19 @@ from openpyxl.styles import Font, Color, PatternFill
|
|
| 8 |
from openpyxl.styles.colors import WHITE
|
| 9 |
import gradio as gr
|
| 10 |
import underthesea
|
|
|
|
| 11 |
|
| 12 |
# Load the model and tokenizer
|
| 13 |
senti_model = RobertaForSequenceClassification.from_pretrained("wonrax/phobert-base-vietnamese-sentiment")
|
| 14 |
senti_tokenizer = AutoTokenizer.from_pretrained("wonrax/phobert-base-vietnamese-sentiment", use_fast=False)
|
| 15 |
|
| 16 |
def segmentation(text):
|
| 17 |
-
|
|
|
|
| 18 |
segmented_sentences = []
|
| 19 |
for sentence in sentences:
|
| 20 |
sentence = sentence.strip()
|
| 21 |
-
if sentence: #
|
| 22 |
segmented_sentence = underthesea.word_tokenize(sentence)
|
| 23 |
segmented_sentences.append(' '.join(segmented_sentence))
|
| 24 |
return segmented_sentences
|
|
@@ -153,6 +155,12 @@ def generate_excel_file(df):
|
|
| 153 |
|
| 154 |
return excel_file_path
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
inputs = [
|
| 157 |
gr.Textbox(label="Nhập Văn Bản bằng Tiếng Việt để trải nghiệm ngay"),
|
| 158 |
gr.File(label="Chọn Tệp File Word(docx) Bạn Muốn Phân Tích")
|
|
|
|
| 8 |
from openpyxl.styles.colors import WHITE
|
| 9 |
import gradio as gr
|
| 10 |
import underthesea
|
| 11 |
+
import re
|
| 12 |
|
| 13 |
# Load the model and tokenizer
|
| 14 |
senti_model = RobertaForSequenceClassification.from_pretrained("wonrax/phobert-base-vietnamese-sentiment")
|
| 15 |
senti_tokenizer = AutoTokenizer.from_pretrained("wonrax/phobert-base-vietnamese-sentiment", use_fast=False)
|
| 16 |
|
| 17 |
def segmentation(text):
|
| 18 |
+
# Split text by periods and newlines
|
| 19 |
+
sentences = re.split(r'[.\n]', text)
|
| 20 |
segmented_sentences = []
|
| 21 |
for sentence in sentences:
|
| 22 |
sentence = sentence.strip()
|
| 23 |
+
if sentence: # Ignore empty sentences
|
| 24 |
segmented_sentence = underthesea.word_tokenize(sentence)
|
| 25 |
segmented_sentences.append(' '.join(segmented_sentence))
|
| 26 |
return segmented_sentences
|
|
|
|
| 155 |
|
| 156 |
return excel_file_path
|
| 157 |
|
| 158 |
+
def analyze_from_text(text):
|
| 159 |
+
return analyze_text(text, None)
|
| 160 |
+
|
| 161 |
+
def analyze_from_file(docx_file):
|
| 162 |
+
return analyze_text(None, docx_file)
|
| 163 |
+
|
| 164 |
inputs = [
|
| 165 |
gr.Textbox(label="Nhập Văn Bản bằng Tiếng Việt để trải nghiệm ngay"),
|
| 166 |
gr.File(label="Chọn Tệp File Word(docx) Bạn Muốn Phân Tích")
|