cpv_3.1_eval_pipeline

Sleeping

App Files Files Community

mtyrrell commited on Jan 20, 2024

Commit

b125eed

1 Parent(s): aa9a3c7

v2.1 added RAG summary by group

Browse files

Files changed (17) hide show

.DS_Store +0 -0
.gitignore +1 -0
app.py +29 -29
appStore/__pycache__/__init__.cpython-310.pyc +0 -0
appStore/__pycache__/doc_processing.cpython-310.pyc +0 -0
appStore/__pycache__/rag.cpython-310.pyc +0 -0
appStore/__pycache__/target.cpython-310.pyc +0 -0
appStore/__pycache__/vulnerability_analysis.cpython-310.pyc +0 -0
appStore/rag.py +86 -0
appStore/target.py +35 -1
requirements.txt +4 -1
utils/__pycache__/__init__.cpython-310.pyc +0 -0
utils/__pycache__/config.cpython-310.pyc +0 -0
utils/__pycache__/preprocessing.cpython-310.pyc +0 -0
utils/__pycache__/target_classifier.cpython-310.pyc +0 -0
utils/__pycache__/uploadAndExample.cpython-310.pyc +0 -0
utils/__pycache__/vulnerability_classifier.cpython-310.pyc +0 -0

.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ civ_v2/

app.py CHANGED Viewed

@@ -2,36 +2,36 @@ import streamlit as st
 import os
 import pkg_resources
-# Using this wacky hack to get around the massively ridicolous managed env loading order
-def is_installed(package_name, version):
-    try:
-        pkg = pkg_resources.get_distribution(package_name)
-        return pkg.version == version
-    except pkg_resources.DistributionNotFound:
-        return False
-# shifted from below - this must be the first streamlit call; otherwise: problems
-st.set_page_config(page_title = 'Vulnerability Analysis',
-                   initial_sidebar_state='expanded', layout="wide")
-@st.cache_resource # cache the function so it's not called every time app.py is triggered
-def install_packages():
-    install_commands = []
-    if not is_installed("spaces", "0.12.0"):
-        install_commands.append("pip install spaces==0.17.0")
-    if not is_installed("pydantic", "1.8.2"):
-        install_commands.append("pip install pydantic==1.8.2")
-    if not is_installed("typer", "0.4.0"):
-        install_commands.append("pip install typer==0.4.0")
-    if install_commands:
-        os.system(" && ".join(install_commands))
-# install packages if necessary
-install_packages()
 import appStore.vulnerability_analysis as vulnerability_analysis
 import appStore.target as target_analysis
@@ -41,8 +41,8 @@ from utils.vulnerability_classifier import label_dict
 import pandas as pd
 import plotly.express as px
-#st.set_page_config(page_title = 'Vulnerability Analysis',
- #                  initial_sidebar_state='expanded', layout="wide")
 with st.sidebar:
     # upload and example doc
@@ -54,7 +54,7 @@ with st.sidebar:
     add_upload(choice)
 with st.container():
-    st.markdown("<h2 style='text-align: center; color: black;'> Vulnerability Analysis 2.0 </h2>", unsafe_allow_html=True)
     st.write(' ')
 with st.expander("ℹ️ - About this app", expanded=False):

 import os
 import pkg_resources
+# # Using this wacky hack to get around the massively ridicolous managed env loading order
+# def is_installed(package_name, version):
+#     try:
+#         pkg = pkg_resources.get_distribution(package_name)
+#         return pkg.version == version
+#     except pkg_resources.DistributionNotFound:
+#         return False
+# # shifted from below - this must be the first streamlit call; otherwise: problems
+# st.set_page_config(page_title = 'Vulnerability Analysis',
+#                    initial_sidebar_state='expanded', layout="wide")
+# @st.cache_resource # cache the function so it's not called every time app.py is triggered
+# def install_packages():
+#     install_commands = []
+#     if not is_installed("spaces", "0.12.0"):
+#         install_commands.append("pip install spaces==0.17.0")
+#     if not is_installed("pydantic", "1.8.2"):
+#         install_commands.append("pip install pydantic==1.8.2")
+#     if not is_installed("typer", "0.4.0"):
+#         install_commands.append("pip install typer==0.4.0")
+#     if install_commands:
+#         os.system(" && ".join(install_commands))
+# # install packages if necessary
+# install_packages()
 import appStore.vulnerability_analysis as vulnerability_analysis
 import appStore.target as target_analysis
 import pandas as pd
 import plotly.express as px
+st.set_page_config(page_title = 'Vulnerability Analysis',
+                  initial_sidebar_state='expanded', layout="wide")
 with st.sidebar:
     # upload and example doc
     add_upload(choice)
 with st.container():
+    st.markdown("<h2 style='text-align: center;'> Vulnerability Analysis 2.0 </h2>", unsafe_allow_html=True)
     st.write(' ')
 with st.expander("ℹ️ - About this app", expanded=False):

appStore/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (154 Bytes). View file

appStore/__pycache__/doc_processing.cpython-310.pyc ADDED Viewed

Binary file (3.18 kB). View file

appStore/__pycache__/rag.cpython-310.pyc ADDED Viewed

Binary file (1.81 kB). View file

appStore/__pycache__/target.cpython-310.pyc ADDED Viewed

Binary file (2.8 kB). View file

appStore/__pycache__/vulnerability_analysis.cpython-310.pyc ADDED Viewed

Binary file (4.78 kB). View file

appStore/rag.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import os
+# import json
+import numpy as np
+import pandas as pd
+import openai
+from haystack.schema import Document
+import streamlit as st
+from tenacity import retry, stop_after_attempt, wait_random_exponential
+# Get openai API key
+openai.api_key = os.environ["OPENAI_API_KEY"]
+model_select = "gpt-3.5-turbo-1106"
+# define a special function for putting the prompt together (as we can't use haystack)
+def get_prompt(context):
+  base_prompt="Summarize the following context efficiently in bullet points, the less the better. \
+  Summarize only activities that address the vulnerability of the given context to climate change. \
+  Formatting example: \
+    - Collect and utilize gender-disaggregated data to inform and improve climate change adaptation efforts. \
+    - Prioritize gender sensitivity in adaptation options, ensuring participation and benefits for women, who are more vulnerable to climate impacts. \
+"
+  # Add the meta data for references
+  # context = ' - '.join([d.content for d in docs])
+  prompt = base_prompt+"; Context: "+context+"; Answer:"
+  return prompt
+# # convert df rows to Document object so we can feed it into the summarizer easily
+# def get_document(df):
+#     # we take a list of each extract
+#     ls_dict = []
+#     for index, row in df.iterrows():
+#         # Create a Document object for each row (we only need the text)
+#         doc = Document(
+#             row['text'],
+#             meta={
+#             'label': row['Vulnerability Label']}
+#         )
+#         # Append the Document object to the documents list
+#         ls_dict.append(doc)
+#     return ls_dict
+# exception handling for issuing multiple API calls to openai (exponential backoff)
+@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
+def completion_with_backoff(**kwargs):
+    return openai.ChatCompletion.create(**kwargs)
+# construct RAG query, send to openai and process response
+def run_query(df):
+    docs = df
+    '''
+    For non-streamed completion, enable the following 2 lines and comment out the code below
+    '''
+    # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}])
+    # result = res.choices[0].message.content
+    # instantiate ChatCompletion as a generator object (stream is set to True)
+    response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}], stream=True)
+    # iterate through the streamed output
+    report = []
+    res_box = st.empty()
+    for chunk in response:
+        # extract the object containing the text (totally different structure when streaming)
+        chunk_message = chunk['choices'][0]['delta']
+        # test to make sure there is text in the object (some don't have)
+        if 'content' in chunk_message:
+            report.append(chunk_message.content) # extract the message
+            # add the latest text and merge it with all previous
+            result = "".join(report).strip()
+            # res_box.success(result) # output to response text box
+            res_box.success(result)

appStore/target.py CHANGED Viewed

@@ -17,6 +17,7 @@ from io import BytesIO
 import xlsxwriter
 import plotly.express as px
 from utils.target_classifier import label_dict
 # Declare all the necessary variables
 classifier_identifier = 'target'
@@ -82,7 +83,40 @@ def app():
 def target_display():
     # Assign dataframe a name
     df = st.session_state['key2']
     st.write(df)

 import xlsxwriter
 import plotly.express as px
 from utils.target_classifier import label_dict
+from appStore.rag import run_query
 # Declare all the necessary variables
 classifier_identifier = 'target'
 def target_display():
+    ### TABLE Output ###
     # Assign dataframe a name
     df = st.session_state['key2']
     st.write(df)
+    ### RAG Output by group ##
+    # Expand the DataFrame
+    df_expand = df.explode('Vulnerability Label')
+    # Group by 'Vulnerability Label' and concatenate 'text'
+    df_agg = df_expand.groupby('Vulnerability Label')['text'].agg('; '.join).reset_index()
+    st.markdown("----")
+    st.markdown('**DOCUMENT FINDINGS SUMMARY BY VULNERABILITY LABEL:**')
+    # construct RAG query for each label, send to openai and process response
+    for i in range(0,len(df_agg)):
+        st.write(df_agg['Vulnerability Label'].iloc[i])
+        run_query(df_agg['text'].iloc[i])
+        # st.write(df_agg['text'].iloc[i])

requirements.txt CHANGED Viewed

@@ -19,4 +19,7 @@ altair==4.0
 streamlit-aggrid
 python-docx
 setfit
-plotly.express

 streamlit-aggrid
 python-docx
 setfit
+plotly.express
+openai==0.27.9
+pydantic==1.8.2
+scikit-learn==1.0.2

utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (151 Bytes). View file

utils/__pycache__/config.cpython-310.pyc ADDED Viewed

Binary file (1.1 kB). View file

utils/__pycache__/preprocessing.cpython-310.pyc ADDED Viewed

Binary file (9.07 kB). View file

utils/__pycache__/target_classifier.cpython-310.pyc ADDED Viewed

Binary file (3.6 kB). View file

utils/__pycache__/uploadAndExample.cpython-310.pyc ADDED Viewed

Binary file (1.22 kB). View file

utils/__pycache__/vulnerability_classifier.cpython-310.pyc ADDED Viewed

Binary file (4.39 kB). View file