jmuscatello commited on
Commit
916cbfe
Β·
1 Parent(s): 0f220f8

Add organise static demo

Browse files
Files changed (1) hide show
  1. pages/5_πŸ—‚_Organise_Demo.py +64 -1
pages/5_πŸ—‚_Organise_Demo.py CHANGED
@@ -1,9 +1,21 @@
1
  import os
 
 
 
 
 
 
2
 
3
  import streamlit as st
4
  import streamlit_analytics
5
  from utils import add_logo_to_sidebar, add_footer, add_email_signup_form
6
 
 
 
 
 
 
 
7
  streamlit_analytics.start_tracking()
8
 
9
  st.set_page_config(
@@ -22,7 +34,58 @@ add_logo_to_sidebar()
22
  st.sidebar.success("πŸ‘† Select a demo above.")
23
 
24
  st.title('πŸ—‚ Organise Demo')
25
- st.markdown("πŸ— This demo is under construction. Please visit back soon.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  add_email_signup_form()
28
 
 
1
  import os
2
+ import joblib
3
+
4
+ import pandas as pd
5
+ import plotly.express as px
6
+
7
+ from huggingface_hub import hf_hub_download, snapshot_download
8
 
9
  import streamlit as st
10
  import streamlit_analytics
11
  from utils import add_logo_to_sidebar, add_footer, add_email_signup_form
12
 
13
+ HF_TOKEN = os.environ.get("HF_TOKEN")
14
+ MODEL_REPO_ID = "simplexico/cuad-sklearn-contract-clustering"
15
+ DATA_REPO_ID = "simplexico/cuad-top-ten"
16
+ MODEL_FILENAME = "cuad_tfidf_umap_kmeans.pkl"
17
+ DATA_FILENAME = "cuad_top_ten_popular_contract_types.json"
18
+
19
  streamlit_analytics.start_tracking()
20
 
21
  st.set_page_config(
 
34
  st.sidebar.success("πŸ‘† Select a demo above.")
35
 
36
  st.title('πŸ—‚ Organise Demo')
37
+ st.write("""
38
+ This demo shows how AI can be used to organise contracts.
39
+ We've trained a model to group contracts into similar types.
40
+ The plot below shows a sample set of contracts that have been automatically grouped together.
41
+ Each point in the plot represents how the model interprets a contract, the closer together a pair of points are, the more similar they appear to the model.
42
+ \n**TIP:** Hover over each point to see the filename of the contract. Groups can be added or removed by clicking on the symbol in the plot legend.
43
+ """)
44
+ st.write("**πŸ‘ˆ Upload your own contracts on the left (as .txt files)** and hit the button **Organise Data** to see how your own contracts can be grouped together")
45
+
46
+ @st.cache(allow_output_mutation=True)
47
+ def load_model():
48
+ model = joblib.load(
49
+ hf_hub_download(repo_id=MODEL_REPO_ID, filename=MODEL_FILENAME, token=HF_TOKEN)
50
+ )
51
+ return model
52
+
53
+ @st.cache(allow_output_mutation=True)
54
+ def load_dataset():
55
+ snapshot_download(repo_id=DATA_REPO_ID, token=HF_TOKEN, local_dir='./', repo_type='dataset')
56
+ df = pd.read_json(DATA_FILENAME)
57
+ return df
58
+
59
+ def get_transform_and_predictions(model, df):
60
+ X = [text[:500] for text in df['text'].to_list()]
61
+ y = model.predict(X)
62
+ X_transform = model[:2].transform(X)
63
+ return X_transform, y
64
+
65
+ with st.spinner('βš™οΈ Loading model...'):
66
+ cuad_tfidf_umap_kmeans = load_model()
67
+ cuad_df = load_dataset()
68
+
69
+ X_transform, y = get_transform_and_predictions(cuad_tfidf_umap_kmeans, cuad_df)
70
+
71
+ fig = px.scatter_3d(
72
+ x=X_transform[:,0],
73
+ y=X_transform[:,1],
74
+ z=X_transform[:,2],
75
+ color=[str(y_i) for y_i in y], hover_name=cuad_df['filename'].to_list())
76
+
77
+ fig.update_layout(
78
+ legend=dict(
79
+ title='grouping',
80
+ yanchor="top",
81
+ y=0.99,
82
+ xanchor="left",
83
+ x=0.01
84
+ ),
85
+ width=1100,
86
+ height=900
87
+ )
88
+ st.plotly_chart(fig, use_container_width=True, height=1600)
89
 
90
  add_email_signup_form()
91