cpv_3.1_eval_pipeline

Sleeping

App Files Files Community

mtyrrell commited on Aug 7, 2024

Commit

b798c40

verified ·

1 Parent(s): ceedd37

Update appStore/rag.py

Browse files

Files changed (1) hide show

appStore/rag.py +3 -177

appStore/rag.py CHANGED Viewed

@@ -1,106 +1,4 @@
-# import os
-# # import json
-# import numpy as np
-# import pandas as pd
-# import openai
-# from haystack.schema import Document
-# import streamlit as st
-# from tenacity import retry, stop_after_attempt, wait_random_exponential
-# # Get openai API key
-# # openai.api_key = os.environ["OPENAI_API_KEY"]
-# hf_token = os.environ["HF_API_KEY"]
-# #model_select = "gpt-3.5-turbo-0125"
-# model_select ="gpt-4"
-# # define a special function for putting the prompt together (as we can't use haystack)
-# def get_prompt(context, label):
-#   base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \
-#   Summarize only elements of the context that address vulnerability of "+label+" to climate change. \
-#   If there is no mention of "+label+" in the context, return nothing. \
-#   Formatting example: \
-#     - Bullet point 1 \
-#     - Bullet point 2 \
-# "
-#   # Add the meta data for references
-#   # context = ' - '.join([d.content for d in docs])
-#   prompt = base_prompt+"; Context: "+context+"; Answer:"
-#   return prompt
-# # def get_prompt(context, label):
-# #   base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \
-# #   Summarize only elements of the context that address vulnerability to climate change. \
-# #   Formatting example: \
-# #     - Bullet point 1 \
-# #     - Bullet point 2 \
-# # "
-# #   # Add the meta data for references
-# #   # context = ' - '.join([d.content for d in docs])
-# #   prompt = base_prompt+"; Context: "+context+"; Answer:"
-# #   return prompt
-# #   base_prompt="Summarize the following context efficiently in bullet points, the less the better- but keep concrete goals. \
-# #   Summarize only activities that address the vulnerability of "+label+" to climate change. \
-# #   Formatting example: \
-# #     - Collect and utilize gender-disaggregated data to inform and improve climate change adaptation efforts. \
-# #     - Prioritize gender sensitivity in adaptation options, ensuring participation and benefits for women, who are more vulnerable to climate impacts. \
-# # "
-# # # convert df rows to Document object so we can feed it into the summarizer easily
-# # def get_document(df):
-# #     # we take a list of each extract
-# #     ls_dict = []
-# #     for index, row in df.iterrows():
-# #         # Create a Document object for each row (we only need the text)
-# #         doc = Document(
-# #             row['text'],
-# #             meta={
-# #             'label': row['Vulnerability Label']}
-# #         )
-# #         # Append the Document object to the documents list
-# #         ls_dict.append(doc)
-# #     return ls_dict
-# # exception handling for issuing multiple API calls to openai (exponential backoff)
-# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
-# def completion_with_backoff(**kwargs):
-#     return openai.ChatCompletion.create(**kwargs)
-# # construct RAG query, send to openai and process response
-# def run_query(context, label):
-#     '''
-#     For non-streamed completion, enable the following 2 lines and comment out the code below
-#     '''
-#     # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}])
-#     # result = res.choices[0].message.content
-#     # instantiate ChatCompletion as a generator object (stream is set to True)
-#     response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(context, label)}], stream=True)
-#     # iterate through the streamed output
-#     report = []
-#     res_box = st.empty()
-#     for chunk in response:
-#         # extract the object containing the text (totally different structure when streaming)
-#         chunk_message = chunk['choices'][0]['delta']
-#         # test to make sure there is text in the object (some don't have)
-#         if 'content' in chunk_message:
-#             report.append(chunk_message.content) # extract the message
-#             # add the latest text and merge it with all previous
-#             result = "".join(report).strip()
-#             # res_box.success(result) # output to response text box
-#             res_box.success(result)
 import os
-# import json
 import numpy as np
 import pandas as pd
 import openai
@@ -121,99 +19,27 @@ def get_prompt(context, label):
   If there is no mention of "+label+" in the context, return nothing. \
   Formatting example: \
     - Bullet point 1 \
-    - Bullet point 2 \
-"
-  # Add the meta data for references
-  # context = ' - '.join([d.content for d in docs])
   prompt = base_prompt+"; Context: "+context+"; Answer:"
   return prompt
 # # exception handling for issuing multiple API calls to openai (exponential backoff)
 # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 # def completion_with_backoff(**kwargs):
 #     return openai.ChatCompletion.create(**kwargs)
-def get_prompt(context, label):
-  base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \
-  Summarize only elements of the context that address vulnerability of "+label+" to climate change. \
-  If there is no mention of "+label+" in the context, return nothing. \
-  Do not include an introduction sentence, just the bullet points as per below. \
-  Formatting example: \
-    - Bullet point 1 \
-    - Bullet point 2 \
-"
-  # Add the meta data for references
-  # context = ' - '.join([d.content for d in docs])
-  prompt = base_prompt+"; Context: "+context+"; Answer:"
-  return prompt
-# # construct RAG query, send to openai and process response
-# def run_query(context, label, chatbot_role):
-#     '''
-#     For non-streamed completion, enable the following 2 lines and comment out the code below
-#     '''
-#     # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}])
-#     # result = res.choices[0].message.content
-#     messages = [
-#       ChatMessage(role="system", content=chatbot_role),
-#       ChatMessage(role="user", content=get_prompt(context, label)),
-#     ]
-#     response = llm.chat(messages)
-#     return(response)
-# tokenizer = AutoTokenizer.from_pretrained(
-#     "meta-llama/Meta-Llama-3.1-8B-Instruct",
-#     token=hf_token,
-# )
-# stopping_ids = [
-#     tokenizer.eos_token_id,
-#     tokenizer.convert_tokens_to_ids("<|eot_id|>"),
-# ]
-# Define the role of the chatbot
-# chatbot_role = """You are an analyst specializing in climate change impact assessments and producing insights from policy documents."""
-# construct RAG query, send to openai and process response
 def run_query(context, label):
     '''
     For non-streamed completion, enable the following 2 lines and comment out the code below
     '''
     chatbot_role = """You are an analyst specializing in climate change impact assessments and producing insights from policy documents."""
     messages = [{"role": "system", "content": chatbot_role},{"role": "user", "content": get_prompt(context, label)}]
-    # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}])
-    # result = res.choices[0].message.content
     # Initialize the client, pointing it to one of the available models
-    client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token = hf_token)
-    # response = client.chat.completions.create(
-    #     model="meta-llama/Meta-Llama-3.1-8B-Instruct",
-    #     messages=[
-    #       ChatMessage(role="system", content=chatbot_role),
-    #       ChatMessage(role="user", content=get_prompt(context, label)),
-    #     ],
-    #     stream=True,
-    #     max_tokens=500
-    # )
-    # iterate and print stream
-    # for message in chat_completion:
-    #     print(message.choices[0].delta.content, end="")
     # instantiate ChatCompletion as a generator object (stream is set to True)
     # response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(context, label)}], stream=True)

 import os
 import numpy as np
 import pandas as pd
 import openai
   If there is no mention of "+label+" in the context, return nothing. \
   Formatting example: \
     - Bullet point 1 \
+    - Bullet point 2 "
   prompt = base_prompt+"; Context: "+context+"; Answer:"
   return prompt
 # # exception handling for issuing multiple API calls to openai (exponential backoff)
 # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 # def completion_with_backoff(**kwargs):
 #     return openai.ChatCompletion.create(**kwargs)
+# construct query, send to HF API and process response
 def run_query(context, label):
     '''
     For non-streamed completion, enable the following 2 lines and comment out the code below
     '''
     chatbot_role = """You are an analyst specializing in climate change impact assessments and producing insights from policy documents."""
     messages = [{"role": "system", "content": chatbot_role},{"role": "user", "content": get_prompt(context, label)}]
     # Initialize the client, pointing it to one of the available models
+    client = InferenceClient("meta-llama/Meta-Llama-3.1-8B-Instruct", token = hf_token)
     # instantiate ChatCompletion as a generator object (stream is set to True)
     # response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(context, label)}], stream=True)