cpv_3.1_eval_pipeline

Sleeping

App Files Files Community

mtyrrell commited on Aug 7, 2024

Commit

5729146

verified ·

1 Parent(s): a74ebbe

Update appStore/rag.py

Browse files

Files changed (1) hide show

appStore/rag.py +176 -40

appStore/rag.py CHANGED Viewed

@@ -1,3 +1,104 @@
 import os
 # import json
 import numpy as np
@@ -6,12 +107,12 @@ import openai
 from haystack.schema import Document
 import streamlit as st
 from tenacity import retry, stop_after_attempt, wait_random_exponential
 # Get openai API key
-# openai.api_key = os.environ["OPENAI_API_KEY"]
-#model_select = "gpt-3.5-turbo-0125"
-model_select ="gpt-4"
 # define a special function for putting the prompt together (as we can't use haystack)
 def get_prompt(context, label):
@@ -29,59 +130,91 @@ def get_prompt(context, label):
   return prompt
-# def get_prompt(context, label):
-#   base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \
-#   Summarize only elements of the context that address vulnerability to climate change. \
-#   Formatting example: \
-#     - Bullet point 1 \
-#     - Bullet point 2 \
-# "
-#   # Add the meta data for references
-#   # context = ' - '.join([d.content for d in docs])
-#   prompt = base_prompt+"; Context: "+context+"; Answer:"
-#   return prompt
-#   base_prompt="Summarize the following context efficiently in bullet points, the less the better- but keep concrete goals. \
-#   Summarize only activities that address the vulnerability of "+label+" to climate change. \
-#   Formatting example: \
-#     - Collect and utilize gender-disaggregated data to inform and improve climate change adaptation efforts. \
-#     - Prioritize gender sensitivity in adaptation options, ensuring participation and benefits for women, who are more vulnerable to climate impacts. \
-# "
-# # convert df rows to Document object so we can feed it into the summarizer easily
-# def get_document(df):
-#     # we take a list of each extract
-#     ls_dict = []
-#     for index, row in df.iterrows():
-#         # Create a Document object for each row (we only need the text)
-#         doc = Document(
-#             row['text'],
-#             meta={
-#             'label': row['Vulnerability Label']}
-#         )
-#         # Append the Document object to the documents list
-#         ls_dict.append(doc)
-#     return ls_dict
-# exception handling for issuing multiple API calls to openai (exponential backoff)
-@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
-def completion_with_backoff(**kwargs):
-    return openai.ChatCompletion.create(**kwargs)
 # construct RAG query, send to openai and process response
 def run_query(context, label):
     '''
     For non-streamed completion, enable the following 2 lines and comment out the code below
     '''
     # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}])
     # result = res.choices[0].message.content
     # instantiate ChatCompletion as a generator object (stream is set to True)
-    response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(context, label)}], stream=True)
     # iterate through the streamed output
     report = []
     res_box = st.empty()
@@ -102,3 +235,6 @@ def run_query(context, label):

+# import os
+# # import json
+# import numpy as np
+# import pandas as pd
+# import openai
+# from haystack.schema import Document
+# import streamlit as st
+# from tenacity import retry, stop_after_attempt, wait_random_exponential
+# # Get openai API key
+# # openai.api_key = os.environ["OPENAI_API_KEY"]
+# hf_token = os.environ["HF_API_KEY"]
+# #model_select = "gpt-3.5-turbo-0125"
+# model_select ="gpt-4"
+# # define a special function for putting the prompt together (as we can't use haystack)
+# def get_prompt(context, label):
+#   base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \
+#   Summarize only elements of the context that address vulnerability of "+label+" to climate change. \
+#   If there is no mention of "+label+" in the context, return nothing. \
+#   Formatting example: \
+#     - Bullet point 1 \
+#     - Bullet point 2 \
+# "
+#   # Add the meta data for references
+#   # context = ' - '.join([d.content for d in docs])
+#   prompt = base_prompt+"; Context: "+context+"; Answer:"
+#   return prompt
+# # def get_prompt(context, label):
+# #   base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \
+# #   Summarize only elements of the context that address vulnerability to climate change. \
+# #   Formatting example: \
+# #     - Bullet point 1 \
+# #     - Bullet point 2 \
+# # "
+# #   # Add the meta data for references
+# #   # context = ' - '.join([d.content for d in docs])
+# #   prompt = base_prompt+"; Context: "+context+"; Answer:"
+# #   return prompt
+# #   base_prompt="Summarize the following context efficiently in bullet points, the less the better- but keep concrete goals. \
+# #   Summarize only activities that address the vulnerability of "+label+" to climate change. \
+# #   Formatting example: \
+# #     - Collect and utilize gender-disaggregated data to inform and improve climate change adaptation efforts. \
+# #     - Prioritize gender sensitivity in adaptation options, ensuring participation and benefits for women, who are more vulnerable to climate impacts. \
+# # "
+# # # convert df rows to Document object so we can feed it into the summarizer easily
+# # def get_document(df):
+# #     # we take a list of each extract
+# #     ls_dict = []
+# #     for index, row in df.iterrows():
+# #         # Create a Document object for each row (we only need the text)
+# #         doc = Document(
+# #             row['text'],
+# #             meta={
+# #             'label': row['Vulnerability Label']}
+# #         )
+# #         # Append the Document object to the documents list
+# #         ls_dict.append(doc)
+# #     return ls_dict
+# # exception handling for issuing multiple API calls to openai (exponential backoff)
+# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
+# def completion_with_backoff(**kwargs):
+#     return openai.ChatCompletion.create(**kwargs)
+# # construct RAG query, send to openai and process response
+# def run_query(context, label):
+#     '''
+#     For non-streamed completion, enable the following 2 lines and comment out the code below
+#     '''
+#     # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}])
+#     # result = res.choices[0].message.content
+#     # instantiate ChatCompletion as a generator object (stream is set to True)
+#     response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(context, label)}], stream=True)
+#     # iterate through the streamed output
+#     report = []
+#     res_box = st.empty()
+#     for chunk in response:
+#         # extract the object containing the text (totally different structure when streaming)
+#         chunk_message = chunk['choices'][0]['delta']
+#         # test to make sure there is text in the object (some don't have)
+#         if 'content' in chunk_message:
+#             report.append(chunk_message.content) # extract the message
+#             # add the latest text and merge it with all previous
+#             result = "".join(report).strip()
+#             # res_box.success(result) # output to response text box
+#             res_box.success(result)
 import os
 # import json
 import numpy as np
 from haystack.schema import Document
 import streamlit as st
 from tenacity import retry, stop_after_attempt, wait_random_exponential
+from huggingface_hub import InferenceClient
 # Get openai API key
+openai.api_key = os.environ["OPENAI_API_KEY"]
 # define a special function for putting the prompt together (as we can't use haystack)
 def get_prompt(context, label):
   return prompt
+# # exception handling for issuing multiple API calls to openai (exponential backoff)
+# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
+# def completion_with_backoff(**kwargs):
+#     return openai.ChatCompletion.create(**kwargs)
+def get_prompt(context, label):
+  base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \
+  Summarize only elements of the context that address vulnerability of "+label+" to climate change. \
+  If there is no mention of "+label+" in the context, return nothing. \
+  Do not include an introduction sentence, just the bullet points as per below. \
+  Formatting example: \
+    - Bullet point 1 \
+    - Bullet point 2 \
+"
+  # Add the meta data for references
+  # context = ' - '.join([d.content for d in docs])
+  prompt = base_prompt+"; Context: "+context+"; Answer:"
+  return prompt
+# # construct RAG query, send to openai and process response
+# def run_query(context, label, chatbot_role):
+#     '''
+#     For non-streamed completion, enable the following 2 lines and comment out the code below
+#     '''
+#     # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}])
+#     # result = res.choices[0].message.content
+#     messages = [
+#       ChatMessage(role="system", content=chatbot_role),
+#       ChatMessage(role="user", content=get_prompt(context, label)),
+#     ]
+#     response = llm.chat(messages)
+#     return(response)
+# tokenizer = AutoTokenizer.from_pretrained(
+#     "meta-llama/Meta-Llama-3.1-8B-Instruct",
+#     token=hf_token,
+# )
+# stopping_ids = [
+#     tokenizer.eos_token_id,
+#     tokenizer.convert_tokens_to_ids("<|eot_id|>"),
+# ]
+# Define the role of the chatbot
+# chatbot_role = """You are an analyst specializing in climate change impact assessments and producing insights from policy documents."""
 # construct RAG query, send to openai and process response
 def run_query(context, label):
     '''
     For non-streamed completion, enable the following 2 lines and comment out the code below
     '''
+    chatbot_role = """You are an analyst specializing in climate change impact assessments and producing insights from policy documents."""
     # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}])
     # result = res.choices[0].message.content
+    # Initialize the client, pointing it to one of the available models
+    client = InferenceClient()
+    response = client.chat.completions.create(
+        model="meta-llama/Meta-Llama-3.1-8B-Instruct",
+        messages=[
+          ChatMessage(role="system", content=chatbot_role),
+          ChatMessage(role="user", content=get_prompt(context, label)),
+        ],
+        stream=True,
+        max_tokens=500
+    )
+    # iterate and print stream
+    for message in chat_completion:
+        print(message.choices[0].delta.content, end="")
     # instantiate ChatCompletion as a generator object (stream is set to True)
+    # response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(context, label)}], stream=True)
     # iterate through the streamed output
     report = []
     res_box = st.empty()