Spaces:
Runtime error
Runtime error
Commit
·
3f9aebf
1
Parent(s):
671160e
Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,10 @@
|
|
1 |
-
from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, pipeline
|
2 |
import nltk.data
|
3 |
import pandas as pd
|
4 |
import matplotlib.pyplot as plt
|
5 |
-
from google_trans_new import google_translator
|
6 |
|
7 |
nltk.download('punkt')
|
8 |
|
9 |
-
|
10 |
import gradio as gr
|
11 |
from gradio.mix import Parallel
|
12 |
|
@@ -18,6 +16,12 @@ pretrained_ner = "cahya/bert-base-indonesian-NER"
|
|
18 |
|
19 |
sentence_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
sentiment_pipeline = pipeline(
|
22 |
"sentiment-analysis",
|
23 |
model=pretrained_sentiment,
|
@@ -32,14 +36,19 @@ ner_pipeline = pipeline(
|
|
32 |
grouped_entities=True
|
33 |
)
|
34 |
|
35 |
-
translator = google_translator()
|
36 |
-
|
37 |
examples = [
|
38 |
"Perusahaan industri e-commerce Indonesia, Bukalapak telah memberhentikan puluhan karyawan dari beberapa function; Berlawanan dengan PHK sebelumnya, perusahaan mengontrak jajaran pekerja kantornya, harian Kompas melaporkan.",
|
39 |
"Dengan pabrik produksi baru, perusahaan akan meningkatkan kapasitasnya untuk memenuhi peningkatan permintaan yang diharapkan dan akan meningkatkan penggunaan bahan baku dan oleh karena itu meningkatkan profitabilitas produksi.",
|
40 |
"Lifetree didirikan pada tahun 2000, dan pendapatannya meningkat rata-rata 40% dengan margin di akhir 30-an."
|
41 |
]
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
def summ_t5(text):
|
44 |
input_ids = tokenizer_t5.encode(text, return_tensors='pt')
|
45 |
summary_ids = model_t5.generate(input_ids,
|
@@ -66,7 +75,7 @@ def ner(text):
|
|
66 |
def sentiment_df(text):
|
67 |
df = pd.DataFrame(columns=['Text', 'Eng', 'Label', 'Score'])
|
68 |
text_list = sentence_tokenizer.tokenize(text)
|
69 |
-
eng_text = [
|
70 |
result = [sentiment_analysis(text) for text in text_list]
|
71 |
labels = []
|
72 |
scores = []
|
@@ -82,7 +91,7 @@ def sentiment_df(text):
|
|
82 |
|
83 |
def run(text):
|
84 |
summ_ = summ_t5(text)
|
85 |
-
summ_translated =
|
86 |
sent_ = sentiment_analysis(summ_translated )
|
87 |
ner_ = ner(summ_)
|
88 |
df_sentiment = sentiment_df(text)
|
|
|
1 |
+
from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, MarianMTModel, MarianTokenizer, pipeline
|
2 |
import nltk.data
|
3 |
import pandas as pd
|
4 |
import matplotlib.pyplot as plt
|
|
|
5 |
|
6 |
nltk.download('punkt')
|
7 |
|
|
|
8 |
import gradio as gr
|
9 |
from gradio.mix import Parallel
|
10 |
|
|
|
16 |
|
17 |
sentence_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
|
18 |
|
19 |
+
tokenizer_translate = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-id-en")
|
20 |
+
model_translate = MarianMTModel.from_pretrained(
|
21 |
+
"Helsinki-NLP/opus-mt-id-en")
|
22 |
+
#finetuned_model = MarianMTModel.from_pretrained(
|
23 |
+
# "wolfrage89/annual_report_translation_id_en")
|
24 |
+
|
25 |
sentiment_pipeline = pipeline(
|
26 |
"sentiment-analysis",
|
27 |
model=pretrained_sentiment,
|
|
|
36 |
grouped_entities=True
|
37 |
)
|
38 |
|
|
|
|
|
39 |
examples = [
|
40 |
"Perusahaan industri e-commerce Indonesia, Bukalapak telah memberhentikan puluhan karyawan dari beberapa function; Berlawanan dengan PHK sebelumnya, perusahaan mengontrak jajaran pekerja kantornya, harian Kompas melaporkan.",
|
41 |
"Dengan pabrik produksi baru, perusahaan akan meningkatkan kapasitasnya untuk memenuhi peningkatan permintaan yang diharapkan dan akan meningkatkan penggunaan bahan baku dan oleh karena itu meningkatkan profitabilitas produksi.",
|
42 |
"Lifetree didirikan pada tahun 2000, dan pendapatannya meningkat rata-rata 40% dengan margin di akhir 30-an."
|
43 |
]
|
44 |
|
45 |
+
def get_translation(text):
|
46 |
+
translated_tokens = model_translate.generate(
|
47 |
+
**tokenizer_translate([text], return_tensors='pt', max_length=104, truncation=True))[0]
|
48 |
+
translated_sentence = tokenizer_translate.decode(
|
49 |
+
translated_tokens, skip_special_tokens=True)
|
50 |
+
return translated_sentence
|
51 |
+
|
52 |
def summ_t5(text):
|
53 |
input_ids = tokenizer_t5.encode(text, return_tensors='pt')
|
54 |
summary_ids = model_t5.generate(input_ids,
|
|
|
75 |
def sentiment_df(text):
|
76 |
df = pd.DataFrame(columns=['Text', 'Eng', 'Label', 'Score'])
|
77 |
text_list = sentence_tokenizer.tokenize(text)
|
78 |
+
eng_text = [get_translation(text) for text in text_list]
|
79 |
result = [sentiment_analysis(text) for text in text_list]
|
80 |
labels = []
|
81 |
scores = []
|
|
|
91 |
|
92 |
def run(text):
|
93 |
summ_ = summ_t5(text)
|
94 |
+
summ_translated = get_translation(summ_)
|
95 |
sent_ = sentiment_analysis(summ_translated )
|
96 |
ner_ = ner(summ_)
|
97 |
df_sentiment = sentiment_df(text)
|