Spaces:

simplexico
/

legal-ai-actions

Runtime error

App Files Files Community

jmuscatello commited on Apr 3, 2023

Commit

916cbfe

1 Parent(s): 0f220f8

Add organise static demo

Browse files

Files changed (1) hide show

pages/5_🗂_Organise_Demo.py +64 -1

pages/5_🗂_Organise_Demo.py CHANGED Viewed

@@ -1,9 +1,21 @@
 import os
 import streamlit as st
 import streamlit_analytics
 from utils import add_logo_to_sidebar, add_footer, add_email_signup_form
 streamlit_analytics.start_tracking()
 st.set_page_config(
@@ -22,7 +34,58 @@ add_logo_to_sidebar()
 st.sidebar.success("👆 Select a demo above.")
 st.title('🗂 Organise Demo')
-st.markdown("🏗 This demo is under construction. Please visit back soon.")
 add_email_signup_form()

 import os
+import joblib
+import pandas as pd
+import plotly.express as px
+from huggingface_hub import hf_hub_download, snapshot_download
 import streamlit as st
 import streamlit_analytics
 from utils import add_logo_to_sidebar, add_footer, add_email_signup_form
+HF_TOKEN = os.environ.get("HF_TOKEN")
+MODEL_REPO_ID = "simplexico/cuad-sklearn-contract-clustering"
+DATA_REPO_ID = "simplexico/cuad-top-ten"
+MODEL_FILENAME = "cuad_tfidf_umap_kmeans.pkl"
+DATA_FILENAME = "cuad_top_ten_popular_contract_types.json"
 streamlit_analytics.start_tracking()
 st.set_page_config(
 st.sidebar.success("👆 Select a demo above.")
 st.title('🗂 Organise Demo')
+st.write("""
+This demo shows how AI can be used to organise contracts.
+We've trained a model to group contracts into similar types.
+The plot below shows a sample set of contracts that have been automatically grouped together.
+Each point in the plot represents how the model interprets a contract, the closer together a pair of points are, the more similar they appear to the model.
+\n**TIP:** Hover over each point to see the filename of the contract. Groups can be added or removed by clicking on the symbol in the plot legend.
+""")
+st.write("**👈 Upload your own contracts on the left (as .txt files)** and hit the button **Organise Data** to see how your own contracts can be grouped together")
+@st.cache(allow_output_mutation=True)
+def load_model():
+    model = joblib.load(
+        hf_hub_download(repo_id=MODEL_REPO_ID, filename=MODEL_FILENAME, token=HF_TOKEN)
+    )
+    return model
+@st.cache(allow_output_mutation=True)
+def load_dataset():
+    snapshot_download(repo_id=DATA_REPO_ID, token=HF_TOKEN, local_dir='./', repo_type='dataset')
+    df = pd.read_json(DATA_FILENAME)
+    return df
+def get_transform_and_predictions(model, df):
+    X = [text[:500] for text in df['text'].to_list()]
+    y = model.predict(X)
+    X_transform = model[:2].transform(X)
+    return X_transform, y
+with st.spinner('⚙️ Loading model...'):
+    cuad_tfidf_umap_kmeans = load_model()
+    cuad_df = load_dataset()
+    X_transform, y = get_transform_and_predictions(cuad_tfidf_umap_kmeans, cuad_df)
+    fig = px.scatter_3d(
+        x=X_transform[:,0],
+        y=X_transform[:,1],
+        z=X_transform[:,2],
+        color=[str(y_i) for y_i in y], hover_name=cuad_df['filename'].to_list())
+    fig.update_layout(
+        legend=dict(
+            title='grouping',
+            yanchor="top",
+            y=0.99,
+            xanchor="left",
+            x=0.01
+        ),
+        width=1100,
+        height=900
+    )
+    st.plotly_chart(fig, use_container_width=True, height=1600)
 add_email_signup_form()