Spaces:
Runtime error
Runtime error
Commit
Β·
916cbfe
1
Parent(s):
0f220f8
Add organise static demo
Browse files
pages/5_π_Organise_Demo.py
CHANGED
@@ -1,9 +1,21 @@
|
|
1 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
import streamlit as st
|
4 |
import streamlit_analytics
|
5 |
from utils import add_logo_to_sidebar, add_footer, add_email_signup_form
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
streamlit_analytics.start_tracking()
|
8 |
|
9 |
st.set_page_config(
|
@@ -22,7 +34,58 @@ add_logo_to_sidebar()
|
|
22 |
st.sidebar.success("π Select a demo above.")
|
23 |
|
24 |
st.title('π Organise Demo')
|
25 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
add_email_signup_form()
|
28 |
|
|
|
1 |
import os
|
2 |
+
import joblib
|
3 |
+
|
4 |
+
import pandas as pd
|
5 |
+
import plotly.express as px
|
6 |
+
|
7 |
+
from huggingface_hub import hf_hub_download, snapshot_download
|
8 |
|
9 |
import streamlit as st
|
10 |
import streamlit_analytics
|
11 |
from utils import add_logo_to_sidebar, add_footer, add_email_signup_form
|
12 |
|
13 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
14 |
+
MODEL_REPO_ID = "simplexico/cuad-sklearn-contract-clustering"
|
15 |
+
DATA_REPO_ID = "simplexico/cuad-top-ten"
|
16 |
+
MODEL_FILENAME = "cuad_tfidf_umap_kmeans.pkl"
|
17 |
+
DATA_FILENAME = "cuad_top_ten_popular_contract_types.json"
|
18 |
+
|
19 |
streamlit_analytics.start_tracking()
|
20 |
|
21 |
st.set_page_config(
|
|
|
34 |
st.sidebar.success("π Select a demo above.")
|
35 |
|
36 |
st.title('π Organise Demo')
|
37 |
+
st.write("""
|
38 |
+
This demo shows how AI can be used to organise contracts.
|
39 |
+
We've trained a model to group contracts into similar types.
|
40 |
+
The plot below shows a sample set of contracts that have been automatically grouped together.
|
41 |
+
Each point in the plot represents how the model interprets a contract, the closer together a pair of points are, the more similar they appear to the model.
|
42 |
+
\n**TIP:** Hover over each point to see the filename of the contract. Groups can be added or removed by clicking on the symbol in the plot legend.
|
43 |
+
""")
|
44 |
+
st.write("**π Upload your own contracts on the left (as .txt files)** and hit the button **Organise Data** to see how your own contracts can be grouped together")
|
45 |
+
|
46 |
+
@st.cache(allow_output_mutation=True)
|
47 |
+
def load_model():
|
48 |
+
model = joblib.load(
|
49 |
+
hf_hub_download(repo_id=MODEL_REPO_ID, filename=MODEL_FILENAME, token=HF_TOKEN)
|
50 |
+
)
|
51 |
+
return model
|
52 |
+
|
53 |
+
@st.cache(allow_output_mutation=True)
|
54 |
+
def load_dataset():
|
55 |
+
snapshot_download(repo_id=DATA_REPO_ID, token=HF_TOKEN, local_dir='./', repo_type='dataset')
|
56 |
+
df = pd.read_json(DATA_FILENAME)
|
57 |
+
return df
|
58 |
+
|
59 |
+
def get_transform_and_predictions(model, df):
|
60 |
+
X = [text[:500] for text in df['text'].to_list()]
|
61 |
+
y = model.predict(X)
|
62 |
+
X_transform = model[:2].transform(X)
|
63 |
+
return X_transform, y
|
64 |
+
|
65 |
+
with st.spinner('βοΈ Loading model...'):
|
66 |
+
cuad_tfidf_umap_kmeans = load_model()
|
67 |
+
cuad_df = load_dataset()
|
68 |
+
|
69 |
+
X_transform, y = get_transform_and_predictions(cuad_tfidf_umap_kmeans, cuad_df)
|
70 |
+
|
71 |
+
fig = px.scatter_3d(
|
72 |
+
x=X_transform[:,0],
|
73 |
+
y=X_transform[:,1],
|
74 |
+
z=X_transform[:,2],
|
75 |
+
color=[str(y_i) for y_i in y], hover_name=cuad_df['filename'].to_list())
|
76 |
+
|
77 |
+
fig.update_layout(
|
78 |
+
legend=dict(
|
79 |
+
title='grouping',
|
80 |
+
yanchor="top",
|
81 |
+
y=0.99,
|
82 |
+
xanchor="left",
|
83 |
+
x=0.01
|
84 |
+
),
|
85 |
+
width=1100,
|
86 |
+
height=900
|
87 |
+
)
|
88 |
+
st.plotly_chart(fig, use_container_width=True, height=1600)
|
89 |
|
90 |
add_email_signup_form()
|
91 |
|