Stefan Dumitrescu commited on
Commit
f3f70d0
Β·
1 Parent(s): ba99ac9
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +41 -14
  3. gitattributes +0 -27
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Turkish Named Entity Recognition
3
  emoji: πŸƒ
4
  colorFrom: indigo
5
  colorTo: indigo
 
1
  ---
2
+ title: Romanian Named Entity Recognition
3
  emoji: πŸƒ
4
  colorFrom: indigo
5
  colorTo: indigo
app.py CHANGED
@@ -62,29 +62,56 @@ Run_Button = st.button("Run", key=None)
62
  if Run_Button == True:
63
 
64
  ner = setModel(named_persons_only = False)
65
- output = ner(input_text)[0]
66
-
67
- df = pd.DataFrame.from_dict(output)
68
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  st.subheader("Recognized Entities")
70
  st.dataframe(df)
71
 
72
- """
73
  st.subheader("Spacy Style Display")
74
  spacy_display = {}
75
  spacy_display["ents"] = []
76
- spacy_display["text"] = input_text
77
  spacy_display["title"] = None
78
 
79
- for entity in output:
80
- if aggregation != "none":
81
- spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": entity["entity_group"]})
82
- else:
83
- spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": entity["entity"]})
84
 
85
- entity_list = ["PER", "LOC", "ORG", "MISC"]
86
- colors = {'PER': '#85DCDF', 'LOC': '#DF85DC', 'ORG': '#DCDF85', 'MISC': '#85ABDF',}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True, options={"ents": entity_list, "colors": colors})
88
  style = "<style>mark.entity { display: inline-block }</style>"
89
- st.write(f"{style}{get_html(html)}", unsafe_allow_html=True)
90
- """
 
62
  if Run_Button == True:
63
 
64
  ner = setModel(named_persons_only = False)
65
+ output = ner(input_text)[0] # only one sentence
 
 
66
 
67
+ # tabular form
68
+ data = []
69
+ for word in output["words"]:
70
+ if word["tag"]!="O":
71
+ data.append({
72
+ "word": word["text"],
73
+ "tag": word["tag"],
74
+ "start_char": word["start_char"],
75
+ "end_char": word["end_char"],
76
+ "span_after": word["span_after"],
77
+ })
78
+ df = pd.DataFrame.from_dict(data)
79
  st.subheader("Recognized Entities")
80
  st.dataframe(df)
81
 
82
+
83
  st.subheader("Spacy Style Display")
84
  spacy_display = {}
85
  spacy_display["ents"] = []
86
+ spacy_display["text"] = output["text"]
87
  spacy_display["title"] = None
88
 
89
+ for word in output["words"]:
90
+ #spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": entity["entity_group"]})
91
+ spacy_display["ents"].append({"start": word["start_char"], "end": word["end_char"], "label": word["tag"]})
 
 
92
 
93
+ entity_list = ['O', 'PERSON', 'ORG', 'GPE', 'LOC', 'NAT_REL_POL',
94
+ 'EVENT', 'LANGUAGE', 'WORK_OF_ART', 'DATETIME',
95
+ 'PERIOD', 'MONEY', 'QUANTITY', 'NUMERIC',
96
+ 'ORDINAL', 'FACILITY']
97
+ colors = {
98
+ 'O': '#FFF',
99
+ 'PERSON': '#F00',
100
+ 'ORG': '#F00',
101
+ 'GPE': '#F00',
102
+ 'LOC': '#F00',
103
+ 'NAT_REL_POL': '#F00',
104
+ 'EVENT': '#F00',
105
+ 'LANGUAGE': '#F00',
106
+ 'WORK_OF_ART': '#F00',
107
+ 'DATETIME': '#F00',
108
+ 'PERIOD': '#F00',
109
+ 'MONEY': '#F00',
110
+ 'QUANTITY': '#F00',
111
+ 'NUMERIC': '#F00',
112
+ 'ORDINAL': '#F00',
113
+ 'FACILITY': '#F00',
114
+ }
115
  html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True, options={"ents": entity_list, "colors": colors})
116
  style = "<style>mark.entity { display: inline-block }</style>"
117
+ st.write(f"{style}{get_html(html)}", unsafe_allow_html=True)
 
gitattributes DELETED
@@ -1,27 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bin.* filter=lfs diff=lfs merge=lfs -text
5
- *.bz2 filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.model filter=lfs diff=lfs merge=lfs -text
12
- *.msgpack filter=lfs diff=lfs merge=lfs -text
13
- *.onnx filter=lfs diff=lfs merge=lfs -text
14
- *.ot filter=lfs diff=lfs merge=lfs -text
15
- *.parquet filter=lfs diff=lfs merge=lfs -text
16
- *.pb filter=lfs diff=lfs merge=lfs -text
17
- *.pt filter=lfs diff=lfs merge=lfs -text
18
- *.pth filter=lfs diff=lfs merge=lfs -text
19
- *.rar filter=lfs diff=lfs merge=lfs -text
20
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
- *.tar.* filter=lfs diff=lfs merge=lfs -text
22
- *.tflite filter=lfs diff=lfs merge=lfs -text
23
- *.tgz filter=lfs diff=lfs merge=lfs -text
24
- *.xz filter=lfs diff=lfs merge=lfs -text
25
- *.zip filter=lfs diff=lfs merge=lfs -text
26
- *.zstandard filter=lfs diff=lfs merge=lfs -text
27
- *tfevents* filter=lfs diff=lfs merge=lfs -text