Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	v2.1 added RAG summary by group
Browse files- .DS_Store +0 -0
- .gitignore +1 -0
- app.py +29 -29
- appStore/__pycache__/__init__.cpython-310.pyc +0 -0
- appStore/__pycache__/doc_processing.cpython-310.pyc +0 -0
- appStore/__pycache__/rag.cpython-310.pyc +0 -0
- appStore/__pycache__/target.cpython-310.pyc +0 -0
- appStore/__pycache__/vulnerability_analysis.cpython-310.pyc +0 -0
- appStore/rag.py +86 -0
- appStore/target.py +35 -1
- requirements.txt +4 -1
- utils/__pycache__/__init__.cpython-310.pyc +0 -0
- utils/__pycache__/config.cpython-310.pyc +0 -0
- utils/__pycache__/preprocessing.cpython-310.pyc +0 -0
- utils/__pycache__/target_classifier.cpython-310.pyc +0 -0
- utils/__pycache__/uploadAndExample.cpython-310.pyc +0 -0
- utils/__pycache__/vulnerability_classifier.cpython-310.pyc +0 -0
    	
        .DS_Store
    ADDED
    
    | Binary file (8.2 kB). View file | 
|  | 
    	
        .gitignore
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            civ_v2/
         | 
    	
        app.py
    CHANGED
    
    | @@ -2,36 +2,36 @@ import streamlit as st | |
| 2 | 
             
            import os
         | 
| 3 | 
             
            import pkg_resources
         | 
| 4 |  | 
| 5 | 
            -
            # Using this wacky hack to get around the massively ridicolous managed env loading order
         | 
| 6 | 
            -
            def is_installed(package_name, version):
         | 
| 7 | 
            -
             | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 12 | 
            -
             | 
| 13 | 
            -
            # shifted from below - this must be the first streamlit call; otherwise: problems
         | 
| 14 | 
            -
            st.set_page_config(page_title = 'Vulnerability Analysis', 
         | 
| 15 | 
            -
             | 
| 16 | 
            -
             | 
| 17 | 
            -
            @st.cache_resource # cache the function so it's not called every time app.py is triggered
         | 
| 18 | 
            -
            def install_packages():
         | 
| 19 | 
            -
             | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 |  | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 |  | 
| 27 | 
            -
             | 
| 28 | 
            -
             | 
| 29 |  | 
| 30 | 
            -
             | 
| 31 | 
            -
             | 
| 32 |  | 
| 33 | 
            -
            # install packages if necessary
         | 
| 34 | 
            -
            install_packages()
         | 
| 35 |  | 
| 36 | 
             
            import appStore.vulnerability_analysis as vulnerability_analysis
         | 
| 37 | 
             
            import appStore.target as target_analysis
         | 
| @@ -41,8 +41,8 @@ from utils.vulnerability_classifier import label_dict | |
| 41 | 
             
            import pandas as pd
         | 
| 42 | 
             
            import plotly.express as px
         | 
| 43 |  | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 |  | 
| 47 | 
             
            with st.sidebar:
         | 
| 48 | 
             
                # upload and example doc
         | 
| @@ -54,7 +54,7 @@ with st.sidebar: | |
| 54 | 
             
                add_upload(choice) 
         | 
| 55 |  | 
| 56 | 
             
            with st.container():
         | 
| 57 | 
            -
                st.markdown("<h2 style='text-align: center; | 
| 58 | 
             
                st.write(' ')
         | 
| 59 |  | 
| 60 | 
             
            with st.expander("ℹ️ - About this app", expanded=False):
         | 
|  | |
| 2 | 
             
            import os
         | 
| 3 | 
             
            import pkg_resources
         | 
| 4 |  | 
| 5 | 
            +
            # # Using this wacky hack to get around the massively ridicolous managed env loading order
         | 
| 6 | 
            +
            # def is_installed(package_name, version):
         | 
| 7 | 
            +
            #     try:
         | 
| 8 | 
            +
            #         pkg = pkg_resources.get_distribution(package_name)
         | 
| 9 | 
            +
            #         return pkg.version == version
         | 
| 10 | 
            +
            #     except pkg_resources.DistributionNotFound:
         | 
| 11 | 
            +
            #         return False
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            # # shifted from below - this must be the first streamlit call; otherwise: problems
         | 
| 14 | 
            +
            # st.set_page_config(page_title = 'Vulnerability Analysis', 
         | 
| 15 | 
            +
            #                    initial_sidebar_state='expanded', layout="wide") 
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            # @st.cache_resource # cache the function so it's not called every time app.py is triggered
         | 
| 18 | 
            +
            # def install_packages():
         | 
| 19 | 
            +
            #     install_commands = []
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            #     if not is_installed("spaces", "0.12.0"):
         | 
| 22 | 
            +
            #         install_commands.append("pip install spaces==0.17.0")
         | 
| 23 |  | 
| 24 | 
            +
            #     if not is_installed("pydantic", "1.8.2"):
         | 
| 25 | 
            +
            #         install_commands.append("pip install pydantic==1.8.2")
         | 
| 26 |  | 
| 27 | 
            +
            #     if not is_installed("typer", "0.4.0"):
         | 
| 28 | 
            +
            #         install_commands.append("pip install typer==0.4.0")
         | 
| 29 |  | 
| 30 | 
            +
            #     if install_commands:
         | 
| 31 | 
            +
            #         os.system(" && ".join(install_commands))
         | 
| 32 |  | 
| 33 | 
            +
            # # install packages if necessary
         | 
| 34 | 
            +
            # install_packages()
         | 
| 35 |  | 
| 36 | 
             
            import appStore.vulnerability_analysis as vulnerability_analysis
         | 
| 37 | 
             
            import appStore.target as target_analysis
         | 
|  | |
| 41 | 
             
            import pandas as pd
         | 
| 42 | 
             
            import plotly.express as px
         | 
| 43 |  | 
| 44 | 
            +
            st.set_page_config(page_title = 'Vulnerability Analysis', 
         | 
| 45 | 
            +
                              initial_sidebar_state='expanded', layout="wide") 
         | 
| 46 |  | 
| 47 | 
             
            with st.sidebar:
         | 
| 48 | 
             
                # upload and example doc
         | 
|  | |
| 54 | 
             
                add_upload(choice) 
         | 
| 55 |  | 
| 56 | 
             
            with st.container():
         | 
| 57 | 
            +
                st.markdown("<h2 style='text-align: center;'> Vulnerability Analysis 2.0 </h2>", unsafe_allow_html=True)
         | 
| 58 | 
             
                st.write(' ')
         | 
| 59 |  | 
| 60 | 
             
            with st.expander("ℹ️ - About this app", expanded=False):
         | 
    	
        appStore/__pycache__/__init__.cpython-310.pyc
    ADDED
    
    | Binary file (154 Bytes). View file | 
|  | 
    	
        appStore/__pycache__/doc_processing.cpython-310.pyc
    ADDED
    
    | Binary file (3.18 kB). View file | 
|  | 
    	
        appStore/__pycache__/rag.cpython-310.pyc
    ADDED
    
    | Binary file (1.81 kB). View file | 
|  | 
    	
        appStore/__pycache__/target.cpython-310.pyc
    ADDED
    
    | Binary file (2.8 kB). View file | 
|  | 
    	
        appStore/__pycache__/vulnerability_analysis.cpython-310.pyc
    ADDED
    
    | Binary file (4.78 kB). View file | 
|  | 
    	
        appStore/rag.py
    ADDED
    
    | @@ -0,0 +1,86 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os
         | 
| 2 | 
            +
            # import json
         | 
| 3 | 
            +
            import numpy as np
         | 
| 4 | 
            +
            import pandas as pd
         | 
| 5 | 
            +
            import openai
         | 
| 6 | 
            +
            from haystack.schema import Document
         | 
| 7 | 
            +
            import streamlit as st
         | 
| 8 | 
            +
            from tenacity import retry, stop_after_attempt, wait_random_exponential
         | 
| 9 | 
            +
             | 
| 10 | 
            +
             | 
| 11 | 
            +
            # Get openai API key
         | 
| 12 | 
            +
            openai.api_key = os.environ["OPENAI_API_KEY"]
         | 
| 13 | 
            +
            model_select = "gpt-3.5-turbo-1106"
         | 
| 14 | 
            +
             | 
| 15 | 
            +
             | 
| 16 | 
            +
            # define a special function for putting the prompt together (as we can't use haystack)
         | 
| 17 | 
            +
            def get_prompt(context):
         | 
| 18 | 
            +
              base_prompt="Summarize the following context efficiently in bullet points, the less the better. \
         | 
| 19 | 
            +
              Summarize only activities that address the vulnerability of the given context to climate change. \
         | 
| 20 | 
            +
              Formatting example: \
         | 
| 21 | 
            +
                - Collect and utilize gender-disaggregated data to inform and improve climate change adaptation efforts. \
         | 
| 22 | 
            +
                - Prioritize gender sensitivity in adaptation options, ensuring participation and benefits for women, who are more vulnerable to climate impacts. \
         | 
| 23 | 
            +
            "
         | 
| 24 | 
            +
             | 
| 25 | 
            +
              # Add the meta data for references
         | 
| 26 | 
            +
              # context = ' - '.join([d.content for d in docs])
         | 
| 27 | 
            +
              prompt = base_prompt+"; Context: "+context+"; Answer:"
         | 
| 28 | 
            +
              
         | 
| 29 | 
            +
              return prompt
         | 
| 30 | 
            +
             | 
| 31 | 
            +
             | 
| 32 | 
            +
            # # convert df rows to Document object so we can feed it into the summarizer easily
         | 
| 33 | 
            +
            # def get_document(df):
         | 
| 34 | 
            +
            #     # we take a list of each extract
         | 
| 35 | 
            +
            #     ls_dict = []
         | 
| 36 | 
            +
            #     for index, row in df.iterrows():
         | 
| 37 | 
            +
            #         # Create a Document object for each row (we only need the text)
         | 
| 38 | 
            +
            #         doc = Document(
         | 
| 39 | 
            +
            #             row['text'],
         | 
| 40 | 
            +
            #             meta={
         | 
| 41 | 
            +
            #             'label': row['Vulnerability Label']}
         | 
| 42 | 
            +
            #         )
         | 
| 43 | 
            +
            #         # Append the Document object to the documents list
         | 
| 44 | 
            +
            #         ls_dict.append(doc)
         | 
| 45 | 
            +
             | 
| 46 | 
            +
            #     return ls_dict 
         | 
| 47 | 
            +
             | 
| 48 | 
            +
             | 
| 49 | 
            +
            # exception handling for issuing multiple API calls to openai (exponential backoff)
         | 
| 50 | 
            +
            @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
         | 
| 51 | 
            +
            def completion_with_backoff(**kwargs):
         | 
| 52 | 
            +
                return openai.ChatCompletion.create(**kwargs)
         | 
| 53 | 
            +
             | 
| 54 | 
            +
             | 
| 55 | 
            +
            # construct RAG query, send to openai and process response
         | 
| 56 | 
            +
            def run_query(df):
         | 
| 57 | 
            +
                docs = df
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                '''
         | 
| 60 | 
            +
                For non-streamed completion, enable the following 2 lines and comment out the code below
         | 
| 61 | 
            +
                '''
         | 
| 62 | 
            +
                # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}])
         | 
| 63 | 
            +
                # result = res.choices[0].message.content
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                # instantiate ChatCompletion as a generator object (stream is set to True)
         | 
| 66 | 
            +
                response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}], stream=True)
         | 
| 67 | 
            +
                # iterate through the streamed output
         | 
| 68 | 
            +
                report = []
         | 
| 69 | 
            +
                res_box = st.empty()
         | 
| 70 | 
            +
                for chunk in response:
         | 
| 71 | 
            +
                    # extract the object containing the text (totally different structure when streaming)
         | 
| 72 | 
            +
                    chunk_message = chunk['choices'][0]['delta']
         | 
| 73 | 
            +
                    # test to make sure there is text in the object (some don't have)
         | 
| 74 | 
            +
                    if 'content' in chunk_message:
         | 
| 75 | 
            +
                        report.append(chunk_message.content) # extract the message
         | 
| 76 | 
            +
                        # add the latest text and merge it with all previous
         | 
| 77 | 
            +
                        result = "".join(report).strip()
         | 
| 78 | 
            +
                        # res_box.success(result) # output to response text box
         | 
| 79 | 
            +
                        res_box.success(result)
         | 
| 80 | 
            +
             | 
| 81 | 
            +
             | 
| 82 | 
            +
             | 
| 83 | 
            +
             | 
| 84 | 
            +
             | 
| 85 | 
            +
             | 
| 86 | 
            +
             | 
    	
        appStore/target.py
    CHANGED
    
    | @@ -17,6 +17,7 @@ from io import BytesIO | |
| 17 | 
             
            import xlsxwriter
         | 
| 18 | 
             
            import plotly.express as px
         | 
| 19 | 
             
            from utils.target_classifier import label_dict
         | 
|  | |
| 20 |  | 
| 21 | 
             
            # Declare all the necessary variables
         | 
| 22 | 
             
            classifier_identifier = 'target'
         | 
| @@ -82,7 +83,40 @@ def app(): | |
| 82 |  | 
| 83 | 
             
            def target_display(): 
         | 
| 84 |  | 
|  | |
|  | |
| 85 | 
             
                # Assign dataframe a name
         | 
| 86 | 
             
                df = st.session_state['key2']
         | 
| 87 | 
            -
             | 
| 88 | 
             
                st.write(df)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 17 | 
             
            import xlsxwriter
         | 
| 18 | 
             
            import plotly.express as px
         | 
| 19 | 
             
            from utils.target_classifier import label_dict
         | 
| 20 | 
            +
            from appStore.rag import run_query
         | 
| 21 |  | 
| 22 | 
             
            # Declare all the necessary variables
         | 
| 23 | 
             
            classifier_identifier = 'target'
         | 
|  | |
| 83 |  | 
| 84 | 
             
            def target_display(): 
         | 
| 85 |  | 
| 86 | 
            +
                ### TABLE Output ###
         | 
| 87 | 
            +
             | 
| 88 | 
             
                # Assign dataframe a name
         | 
| 89 | 
             
                df = st.session_state['key2']
         | 
|  | |
| 90 | 
             
                st.write(df)
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                ### RAG Output by group ##
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                # Expand the DataFrame
         | 
| 95 | 
            +
                df_expand = df.explode('Vulnerability Label')
         | 
| 96 | 
            +
                # Group by 'Vulnerability Label' and concatenate 'text'
         | 
| 97 | 
            +
                df_agg = df_expand.groupby('Vulnerability Label')['text'].agg('; '.join).reset_index()
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                st.markdown("----")
         | 
| 100 | 
            +
                st.markdown('**DOCUMENT FINDINGS SUMMARY BY VULNERABILITY LABEL:**')
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                # construct RAG query for each label, send to openai and process response
         | 
| 103 | 
            +
                for i in range(0,len(df_agg)):
         | 
| 104 | 
            +
                    st.write(df_agg['Vulnerability Label'].iloc[i])
         | 
| 105 | 
            +
                    run_query(df_agg['text'].iloc[i])
         | 
| 106 | 
            +
                    # st.write(df_agg['text'].iloc[i])
         | 
| 107 | 
            +
             | 
| 108 | 
            +
             | 
| 109 | 
            +
             | 
| 110 | 
            +
             | 
| 111 | 
            +
             | 
| 112 | 
            +
             | 
| 113 | 
            +
             | 
| 114 | 
            +
             | 
| 115 | 
            +
             | 
| 116 | 
            +
             | 
| 117 | 
            +
             | 
| 118 | 
            +
             | 
| 119 | 
            +
             | 
| 120 | 
            +
             | 
| 121 | 
            +
             | 
| 122 | 
            +
             | 
    	
        requirements.txt
    CHANGED
    
    | @@ -19,4 +19,7 @@ altair==4.0 | |
| 19 | 
             
            streamlit-aggrid
         | 
| 20 | 
             
            python-docx
         | 
| 21 | 
             
            setfit
         | 
| 22 | 
            -
            plotly.express
         | 
|  | |
|  | |
|  | 
|  | |
| 19 | 
             
            streamlit-aggrid
         | 
| 20 | 
             
            python-docx
         | 
| 21 | 
             
            setfit
         | 
| 22 | 
            +
            plotly.express
         | 
| 23 | 
            +
            openai==0.27.9
         | 
| 24 | 
            +
            pydantic==1.8.2
         | 
| 25 | 
            +
            scikit-learn==1.0.2
         | 
    	
        utils/__pycache__/__init__.cpython-310.pyc
    ADDED
    
    | Binary file (151 Bytes). View file | 
|  | 
    	
        utils/__pycache__/config.cpython-310.pyc
    ADDED
    
    | Binary file (1.1 kB). View file | 
|  | 
    	
        utils/__pycache__/preprocessing.cpython-310.pyc
    ADDED
    
    | Binary file (9.07 kB). View file | 
|  | 
    	
        utils/__pycache__/target_classifier.cpython-310.pyc
    ADDED
    
    | Binary file (3.6 kB). View file | 
|  | 
    	
        utils/__pycache__/uploadAndExample.cpython-310.pyc
    ADDED
    
    | Binary file (1.22 kB). View file | 
|  | 
    	
        utils/__pycache__/vulnerability_classifier.cpython-310.pyc
    ADDED
    
    | Binary file (4.39 kB). View file | 
|  | 
