Spaces:
Runtime error
Runtime error
Commit
路
6d26f77
1
Parent(s):
c6c7469
Upload 2 files
Browse files- app.py +111 -2
- requirements.txt +4 -0
app.py
CHANGED
@@ -1,3 +1,112 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import requests
|
4 |
+
import spacy_udpipe
|
5 |
+
import streamlit as st
|
6 |
+
from spacy import displacy
|
7 |
|
8 |
+
# model = span_marker.SpanMarkerModel.from_pretrained("iahlt/iahlt-span-marker-alephbert-small-nemo-mt-he")
|
9 |
+
spacy_udpipe.download("he")
|
10 |
+
nlp = spacy_udpipe.load("he")
|
11 |
+
nlp.add_pipe("span_marker",
|
12 |
+
config={"model": "iahlt/span-marker-alephbert-small-nemo-mt-he"})
|
13 |
+
|
14 |
+
|
15 |
+
def get_html(html: str):
|
16 |
+
"""Convert HTML so it can be rendered."""
|
17 |
+
WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
|
18 |
+
# Newlines seem to mess with the rendering
|
19 |
+
html = html.replace("\n", " ")
|
20 |
+
style = "<style>mark.entity { display: inline-block }</style>"
|
21 |
+
html = WRAPPER.format(html)
|
22 |
+
return f"{style}{html}"
|
23 |
+
|
24 |
+
|
25 |
+
def page_init():
|
26 |
+
st.header("Named Entity Recognition Demo")
|
27 |
+
|
28 |
+
|
29 |
+
@st.cache
|
30 |
+
def get_html_from_server(text):
|
31 |
+
base_url = "https://ne-api.iahlt.org/api/hebrew/ner/?text={}"
|
32 |
+
|
33 |
+
def get_entities(text):
|
34 |
+
text = text.strip()
|
35 |
+
if text == "":
|
36 |
+
return []
|
37 |
+
response = requests.get(base_url.format(text))
|
38 |
+
answer = response.json()
|
39 |
+
ents = []
|
40 |
+
for ent in answer["ents"]:
|
41 |
+
if ent["entity_group"] == "O":
|
42 |
+
continue
|
43 |
+
ents.append({
|
44 |
+
"start": ent["start"],
|
45 |
+
"end": ent["end"],
|
46 |
+
"label": ent["entity_group"]
|
47 |
+
})
|
48 |
+
answer["ents"] = ents
|
49 |
+
return answer
|
50 |
+
|
51 |
+
def render_entities(text):
|
52 |
+
entities = get_entities(text)
|
53 |
+
html = displacy.render(entities, style="ent", manual=True)
|
54 |
+
return html
|
55 |
+
|
56 |
+
return render_entities(text)
|
57 |
+
|
58 |
+
|
59 |
+
if __name__ == '__main__':
|
60 |
+
page_init()
|
61 |
+
|
62 |
+
displacy_options = {}
|
63 |
+
|
64 |
+
sample_text = "讬讜\"专 讜注讚转 讛谞讜注专 谞转谉 住诇讜讘讟讬拽 讗诪专 砖讛砖讞拽谞讬诐 砖诇 讗谞讞谞讜 诇讗 诪砖转诇讘讬诐 讘讗讬专讜驻讛."
|
65 |
+
|
66 |
+
text = st.text_area("Text", sample_text, height=200, max_chars=1000)
|
67 |
+
btn = st.button("Annotate")
|
68 |
+
style = """
|
69 |
+
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=David+Libre">
|
70 |
+
<style>
|
71 |
+
textarea {
|
72 |
+
font-size: 16px;
|
73 |
+
font-family: 'David+Libre';
|
74 |
+
direction: rtl;
|
75 |
+
}
|
76 |
+
.entities {
|
77 |
+
font-size: 16px;
|
78 |
+
font-family: 'David+Libre';
|
79 |
+
direction: rtl;
|
80 |
+
}
|
81 |
+
</style>
|
82 |
+
"""
|
83 |
+
st.write(style, unsafe_allow_html=True)
|
84 |
+
|
85 |
+
if text and btn:
|
86 |
+
doc = nlp(text)
|
87 |
+
html = displacy.render(
|
88 |
+
doc,
|
89 |
+
style="ent",
|
90 |
+
options=displacy_options,
|
91 |
+
manual=False,
|
92 |
+
)
|
93 |
+
|
94 |
+
nemo_html = get_html(html)
|
95 |
+
iahlt_html = get_html_from_server(text)
|
96 |
+
|
97 |
+
html = f"""
|
98 |
+
<div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl">
|
99 |
+
<div style="width: 50%">
|
100 |
+
<h3>Nemo model results</h3>
|
101 |
+
{nemo_html}
|
102 |
+
</div>
|
103 |
+
<div style="width: 50%">
|
104 |
+
<h3>IAHLT results</h3>
|
105 |
+
{iahlt_html}
|
106 |
+
</div>
|
107 |
+
</div>
|
108 |
+
"""
|
109 |
+
st.write(html, unsafe_allow_html=True)
|
110 |
+
|
111 |
+
else:
|
112 |
+
st.write("")
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
spacy
|
3 |
+
spacy-udpipe
|
4 |
+
span-marker
|