Spaces:
Sleeping
Sleeping
File size: 7,269 Bytes
519b419 74e2066 519b419 74e2066 519b419 74e2066 519b419 74e2066 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
# -*- coding:utf-8 -*-
import io
import pandas
import streamlit as st
from pycaprio import Pycaprio, mappings
from zipfile import ZipFile
from requests.exceptions import JSONDecodeError
from n4a_analytics_lib.analytics import (GlobalStatistics,
IaaStatistics)
from n4a_analytics_lib.constants import KAPPA_LEGEND
@st.cache
def convert_df(df_ex: pandas.DataFrame) -> bytes:
return df_ex.to_csv(encoding="utf-8").encode('utf-8')
def check_login(username: str, password: str) -> bool:
if (len(username) == 0) or (len(password) == 0):
return False
return True
def display_data(col: st.columns) -> None:
col.metric("Total curated annotations",
f"{st.session_state['gs_obj'].total_annotations_project} Named entities")
col.dataframe(st.session_state['gs_obj'].df_i)
selected_data = col.selectbox('Select specific data to display bar plot:',
st.session_state['gs_obj'].documents, key="selector_data")
col.pyplot(st.session_state['gs_obj'].create_plot(selected_data))
def template_agreement_dataframe(title: str,
df: pandas.DataFrame,
total_pov: int,
total_annotations: int,
percentage_pov: float,
mode: str) -> None:
st.subheader(title)
st.markdown(f"{total_pov} / {total_annotations} annotations ({percentage_pov} %)")
st.download_button(
"Press to Download CSV",
convert_df(df),
f"csv_annotators_{mode}.csv",
"text/csv",
key=f'download-csv_{mode}'
)
st.dataframe(df)
def init_session_iaa(data: st.file_uploader,
baseline: st.file_uploader,
col: st.columns) -> None:
project_analyzed = IaaStatistics(zip_project=data, baseline_text=baseline.getvalue())
baseline_analyzer = project_analyzed.analyze_text()
col.markdown(f"""
### BASELINE TEXT: {baseline.name}
- sentences: {baseline_analyzer[0]}
- words: {baseline_analyzer[1]}
- characters: {baseline_analyzer[2]}
""")
st.markdown("## π IAA metrics")
col1_kappa, col2_kappa = st.columns(2)
# Display Kappa group
col1_kappa.subheader("Fleiss Kappa (global score for group):")
col1_kappa.markdown(interpret_kappa(project_analyzed.fleiss_kappa), unsafe_allow_html=True)
# Display pairs kappa
col1_kappa.subheader("Cohen Kappa (score for annotators pair):")
for coders, c_k in project_analyzed.compute_pairs_cohen_kappa().items():
col1_kappa.markdown(f"* {coders[0]} <> {coders[1]} : {interpret_kappa(c_k)}", unsafe_allow_html=True)
# Display Kappa legend
col2_kappa.markdown(KAPPA_LEGEND, unsafe_allow_html=True)
# Plot confusion matrix
if st.checkbox('Display confusion matrix'):
width = st.slider("matrix width", 1, 10, 14)
height = st.slider("matrix height", 1, 10, 4)
st.pyplot(project_analyzed.plot_confusion_matrix(width=width, height=height).figure)
# Agree CSV
template_agreement_dataframe(title="β
οΈ Agree annotations",
df=project_analyzed.df_agree,
total_pov=project_analyzed.total_agree,
total_annotations=project_analyzed.total_annotations,
percentage_pov=project_analyzed.percent_agree,
mode="agree")
# Disagree CSV
template_agreement_dataframe(title="β Disagree annotations",
df=project_analyzed.df_disagree,
total_pov=project_analyzed.total_disagree,
total_annotations=project_analyzed.total_annotations,
percentage_pov=project_analyzed.percent_disagree,
mode="disagree")
# Pie plot
st.subheader("π·οΈ Global Labels Statistics")
st.pyplot(project_analyzed.plot_agreement_pies().figure)
def init_session_statistics(remote: bool, local: bool, data: tuple) -> None:
# clear session
st.session_state = {}
# create a session variable
st.session_state["gs_local"] = local
st.session_state["gs_remote"] = remote
# create a new object:
# if remote fetch data from API Host first
if remote and not(local):
st.success('Fetch curated documents from host INCEpTION API in progress...')
try:
fetch_curated_data_from_remote(
username=data[0],
password=data[1]
)
except JSONDecodeError:
# username / password incorrect
st.error('Username or Password is incorrect please retry.')
st.session_state = {}
if local and not(remote):
st.session_state["gs_obj"] = GlobalStatistics(zip_project=data, remote=False)
def fetch_curated_data_from_remote(username: str,
password: str,
endpoint: str = "https://inception.dhlab.epfl.ch/prod",
project_title: str = "ner4archives-template") -> None:
# open a client
client = Pycaprio(inception_host=endpoint, authentication=(str(username), str(password)))
# get project object
project_name = [p for p in client.api.projects() if p.project_name == project_title]
# get all documents from project
documents = client.api.documents(project_name[0].project_id)
curations = []
zipfiles = []
count = 0
flag = "a"
# iterate over all documents and retrieve only curated into ZIP container
for document in documents:
if count > 0:
flag = "r"
if document.document_state == mappings.DocumentState.CURATION_COMPLETE:
curated_content = client.api.curation(project_name[0].project_id, document,
curation_format=mappings.InceptionFormat.UIMA_CAS_XMI_XML_1_1)
curations.append(curated_content)
for curation in curations:
z = ZipFile(io.BytesIO(curation), mode=flag)
zipfiles.append(z)
count += 1
# Merge all zip in one
with zipfiles[0] as z1:
for fname in zipfiles[1:]:
zf = fname
# print(zf.namelist())
for n in zf.namelist():
if n not in z1.namelist():
z1.writestr(n, zf.open(n).read())
# Create a new object
st.session_state["gs_obj"] = GlobalStatistics(zip_project=z1, remote=True)
def interpret_kappa(score: float) -> str:
color = ""
if score < 0:
color= "#e74c3c;"
elif 0.01 <= score <= 0.20:
color= "#f39c12;"
elif 0.21 <= score <= 0.40:
color= "#f4d03f;"
elif 0.41 <= score <= 0.60:
color= "#5dade2;"
elif 0.61 <= score <= 0.80:
color= "#58d68d;"
elif 0.81 <= score <= 0.99:
color= "#28b463;"
return f"<span style='font-size:30px; color: {color}'>{round(score*100, 2)} %</span>"
|