wiraindrak commited on
Commit
4816a7c
·
1 Parent(s): 0b6de70

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, pipeline
2
+ import nltk.data
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+ from googletrans import Translator
6
+
7
+ nltk.download('punkt')
8
+
9
+
10
+ import gradio as gr
11
+ from gradio.mix import Parallel
12
+
13
+ tokenizer_t5 = T5Tokenizer.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
14
+ model_t5 = T5ForConditionalGeneration.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
15
+
16
+ pretrained_sentiment = "ProsusAI/finbert"
17
+ pretrained_ner = "cahya/bert-base-indonesian-NER"
18
+
19
+ sentence_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
20
+
21
+ sentiment_pipeline = pipeline(
22
+ "sentiment-analysis",
23
+ model=pretrained_sentiment,
24
+ tokenizer=pretrained_sentiment,
25
+ return_all_scores=True
26
+ )
27
+
28
+ ner_pipeline = pipeline(
29
+ "ner",
30
+ model=pretrained_ner,
31
+ tokenizer=pretrained_ner,
32
+ grouped_entities=True
33
+ )
34
+
35
+ translator = Translator()
36
+
37
+ def summ_t5(text):
38
+ input_ids = tokenizer_t5.encode(text, return_tensors='pt')
39
+ summary_ids = model_t5.generate(input_ids,
40
+ max_length=100,
41
+ num_beams=2,
42
+ repetition_penalty=2.5,
43
+ length_penalty=1.0,
44
+ early_stopping=True,
45
+ no_repeat_ngram_size=2,
46
+ use_cache=True)
47
+ summary_text = tokenizer_t5.decode(summary_ids[0], skip_special_tokens=True)
48
+ return summary_text
49
+
50
+ def sentiment_analysis(text):
51
+ output = sentiment_pipeline(text)
52
+ return {elm["label"]: elm["score"] for elm in output[0]}
53
+
54
+ def ner(text):
55
+ output = ner_pipeline(text)
56
+ for elm in output:
57
+ elm['entity'] = elm['entity_group']
58
+ return {"text": text, "entities": output}
59
+
60
+ def sentiment_df(text):
61
+ df = pd.DataFrame(columns=['Text', 'Label', 'Score'])
62
+ text_list = sentence_tokenizer.tokenize(text)
63
+ result = [sentiment_analysis(text) for text in text_list]
64
+ labels = []
65
+ scores = []
66
+ for pred in result:
67
+ idx = list(pred.values()).index(max(list(pred.values())))
68
+ labels.append(list(pred.keys())[idx])
69
+ scores.append(round(list(pred.values())[idx], 3))
70
+ df['Text'] = text_list
71
+ df['Label'] = labels
72
+ df['Score'] = scores
73
+ return df
74
+
75
+ def run(text):
76
+ summ_ = summ_t5(text)
77
+ text_translated = translator.translate(text, src='id', dest='en')
78
+ sent_ = sentiment_analysis(text_translated )
79
+ ner_ = ner(summ_)
80
+ return summ_, sent_, ner_
81
+
82
+ if __name__ == "__main__":
83
+ with gr.Blocks() as demo:
84
+ gr.Markdown("""<h1 style="text-align:center">Stock Analysis - Indonesia</h1>""")
85
+
86
+ gr.Markdown(
87
+ """
88
+ Creator: Wira Indra Kusuma
89
+ """
90
+ )
91
+ with gr.Row():
92
+ with gr.Column():
93
+ input_text = gr.Textbox(label="Input Text")
94
+ analyze_button = gr.Button(label="Analyze")
95
+
96
+ summ_output = gr.Textbox(label="Article Summary")
97
+ ner_output = gr.HighlightedText(label="NER Summary")
98
+ sent_output = gr.Label(label="Sentiment Summary")
99
+
100
+ with gr.Column():
101
+ summ_output = gr.Textbox(label="Article Summary")
102
+ ner_output = gr.HighlightedText(label="NER Summary")
103
+ sent_output = gr.Label(label="Sentiment Summary")
104
+
105
+
106
+ analyze_button.click(run, inputs=input_text, outputs=[summ_output, sent_output, ner_output])
107
+ demo.launch()