Spaces:
Running
Running
vickeee465
commited on
Commit
·
7a079bf
1
Parent(s):
2ab6001
added ner
Browse files- app.py +5 -2
- interfaces/cap.py +13 -13
- interfaces/emotion.py +1 -1
- interfaces/manifesto.py +5 -5
- interfaces/ner.py +42 -0
- interfaces/sentiment.py +1 -1
- requirements.txt +3 -1
app.py
CHANGED
@@ -4,6 +4,8 @@ from interfaces.cap import demo as cap_demo
|
|
4 |
from interfaces.manifesto import demo as manifesto_demo
|
5 |
from interfaces.sentiment import demo as sentiment_demo
|
6 |
from interfaces.emotion import demo as emotion_demo
|
|
|
|
|
7 |
|
8 |
with gr.Blocks() as demo:
|
9 |
gr.Markdown(
|
@@ -16,11 +18,12 @@ with gr.Blocks() as demo:
|
|
16 |
""")
|
17 |
|
18 |
gr.TabbedInterface(
|
19 |
-
interface_list=[cap_demo, manifesto_demo, sentiment_demo, emotion_demo],
|
20 |
-
tab_names=["CAP", "Manifesto", "Sentiment (3)", "Emotions (8)"],
|
21 |
)
|
22 |
|
23 |
if __name__ == "__main__":
|
|
|
24 |
demo.launch()
|
25 |
|
26 |
# TODO: add all languages & domains
|
|
|
4 |
from interfaces.manifesto import demo as manifesto_demo
|
5 |
from interfaces.sentiment import demo as sentiment_demo
|
6 |
from interfaces.emotion import demo as emotion_demo
|
7 |
+
from interfaces.ner import demo as ner_demo
|
8 |
+
from interfaces.ner import download_models as download_spacy_models
|
9 |
|
10 |
with gr.Blocks() as demo:
|
11 |
gr.Markdown(
|
|
|
18 |
""")
|
19 |
|
20 |
gr.TabbedInterface(
|
21 |
+
interface_list=[cap_demo, manifesto_demo, sentiment_demo, emotion_demo, ner_demo],
|
22 |
+
tab_names=["CAP", "Manifesto", "Sentiment (3)", "Emotions (8)", "Named Entity Recognition"],
|
23 |
)
|
24 |
|
25 |
if __name__ == "__main__":
|
26 |
+
download_spacy_models()
|
27 |
demo.launch()
|
28 |
|
29 |
# TODO: add all languages & domains
|
interfaces/cap.py
CHANGED
@@ -14,19 +14,19 @@ from label_dicts import CAP_NUM_DICT, CAP_LABEL_NAMES
|
|
14 |
HF_TOKEN = os.environ["hf_read"]
|
15 |
|
16 |
languages = [
|
17 |
-
"
|
18 |
-
"
|
19 |
-
"
|
20 |
-
"
|
21 |
-
"
|
22 |
-
"
|
23 |
-
"
|
24 |
-
"
|
25 |
-
"
|
26 |
-
"
|
27 |
-
"
|
28 |
-
"
|
29 |
-
"
|
30 |
]
|
31 |
|
32 |
domains = {
|
|
|
14 |
HF_TOKEN = os.environ["hf_read"]
|
15 |
|
16 |
languages = [
|
17 |
+
"Danish",
|
18 |
+
"Dutch",
|
19 |
+
"English",
|
20 |
+
"French",
|
21 |
+
"German",
|
22 |
+
"Hungarian",
|
23 |
+
"Italian",
|
24 |
+
"Polish",
|
25 |
+
"Portuguese",
|
26 |
+
"Spanish",
|
27 |
+
"Czech",
|
28 |
+
"Slovak",
|
29 |
+
"Norwegian"
|
30 |
]
|
31 |
|
32 |
domains = {
|
interfaces/emotion.py
CHANGED
@@ -12,7 +12,7 @@ from label_dicts import MANIFESTO_LABEL_NAMES
|
|
12 |
HF_TOKEN = os.environ["hf_read"]
|
13 |
|
14 |
languages = [
|
15 |
-
"
|
16 |
]
|
17 |
|
18 |
def build_huggingface_path(language: str):
|
|
|
12 |
HF_TOKEN = os.environ["hf_read"]
|
13 |
|
14 |
languages = [
|
15 |
+
"Czech", "English", "French", "German", "Hungarian", "Italian"
|
16 |
]
|
17 |
|
18 |
def build_huggingface_path(language: str):
|
interfaces/manifesto.py
CHANGED
@@ -12,11 +12,11 @@ from label_dicts import MANIFESTO_LABEL_NAMES
|
|
12 |
HF_TOKEN = os.environ["hf_read"]
|
13 |
|
14 |
languages = [
|
15 |
-
"
|
16 |
-
"
|
17 |
-
"
|
18 |
-
"
|
19 |
-
"
|
20 |
]
|
21 |
|
22 |
def build_huggingface_path(language: str):
|
|
|
12 |
HF_TOKEN = os.environ["hf_read"]
|
13 |
|
14 |
languages = [
|
15 |
+
"Armenian", "Bulgarian", "Croatian", "Czech", "Danish", "Dutch", "English",
|
16 |
+
"Estonian", "Finnish", "French", "Georgian", "German", "Greek", "Hebrew",
|
17 |
+
"Hungarian", "Icelandic", "Italian", "Japanese", "Korean", "Latvian",
|
18 |
+
"Lithuanian", "Norwegian", "Polish", "Portuguese", "Romanian", "Russian",
|
19 |
+
"Serbian", "Slovak", "Slovenian", "Spanish", "Swedish", "Turkish"
|
20 |
]
|
21 |
|
22 |
def build_huggingface_path(language: str):
|
interfaces/ner.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
import os
|
4 |
+
import torch
|
5 |
+
import numpy as np
|
6 |
+
from transformers import AutoModelForSequenceClassification
|
7 |
+
from transformers import AutoTokenizer
|
8 |
+
from huggingface_hub import HfApi
|
9 |
+
|
10 |
+
languages = [
|
11 |
+
"English", "Hungarian", "Multilingual"
|
12 |
+
]
|
13 |
+
|
14 |
+
def download_models(models=["en_core_web_lg", "xx_ent_wiki_sm", "hu_core_news_lg"]):
|
15 |
+
for model in models:
|
16 |
+
if model.startswith("hu"):
|
17 |
+
huspacy.download(model)
|
18 |
+
spacy.cli.download(model_name)
|
19 |
+
|
20 |
+
def build_spacy_path(language: str):
|
21 |
+
language = language.lower()
|
22 |
+
if language == "english":
|
23 |
+
return "en_core_web_lg"
|
24 |
+
if language == "hungarian":
|
25 |
+
return "hu_core_news_lg"
|
26 |
+
else:
|
27 |
+
return "xx_ent_wiki_sm"
|
28 |
+
|
29 |
+
def named_entity_recognition(text, language):
|
30 |
+
model_id = build_spacy_path(language)
|
31 |
+
pipeline = spacy.load(model_id)
|
32 |
+
doc = pipeline(text)
|
33 |
+
entities = [(ent.text, ent.label_) for ent in doc.ents]
|
34 |
+
model_id_hf = f"huspacy/{model_id}" if model_id.startswith("hu") else f"spacy/{model_id}"
|
35 |
+
output_info = f'<p style="text-align: center; display: block">Prediction was made using the <a href="https://huggingface.co/{model_id_hf}">{model_id_hf}</a> model.</p>'
|
36 |
+
return entities, output_info
|
37 |
+
|
38 |
+
demo = gr.Interface(
|
39 |
+
fn=named_entity_recognition,
|
40 |
+
inputs=[gr.Textbox(lines=6, label="Input"),
|
41 |
+
gr.Dropdown(languages, label="Language")],
|
42 |
+
outputs=[gr.HighlightedText(label='Output'), gr.Markdown()])
|
interfaces/sentiment.py
CHANGED
@@ -12,7 +12,7 @@ from label_dicts import MANIFESTO_LABEL_NAMES
|
|
12 |
HF_TOKEN = os.environ["hf_read"]
|
13 |
|
14 |
languages = [
|
15 |
-
"
|
16 |
]
|
17 |
|
18 |
def build_huggingface_path(language: str):
|
|
|
12 |
HF_TOKEN = os.environ["hf_read"]
|
13 |
|
14 |
languages = [
|
15 |
+
"Czech", "English", "French", "German", "Hungarian", "Italian"
|
16 |
]
|
17 |
|
18 |
def build_huggingface_path(language: str):
|
requirements.txt
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
pandas
|
2 |
torch==2.2.1
|
3 |
transformers==4.39.1
|
4 |
-
sentencepiece==0.2.0
|
|
|
|
|
|
1 |
pandas
|
2 |
torch==2.2.1
|
3 |
transformers==4.39.1
|
4 |
+
sentencepiece==0.2.0
|
5 |
+
spacy
|
6 |
+
huspacy
|