mtyrrell commited on
Commit
b798c40
·
verified ·
1 Parent(s): ceedd37

Update appStore/rag.py

Browse files
Files changed (1) hide show
  1. appStore/rag.py +3 -177
appStore/rag.py CHANGED
@@ -1,106 +1,4 @@
1
- # import os
2
- # # import json
3
- # import numpy as np
4
- # import pandas as pd
5
- # import openai
6
- # from haystack.schema import Document
7
- # import streamlit as st
8
- # from tenacity import retry, stop_after_attempt, wait_random_exponential
9
-
10
-
11
- # # Get openai API key
12
- # # openai.api_key = os.environ["OPENAI_API_KEY"]
13
- # hf_token = os.environ["HF_API_KEY"]
14
- # #model_select = "gpt-3.5-turbo-0125"
15
- # model_select ="gpt-4"
16
-
17
- # # define a special function for putting the prompt together (as we can't use haystack)
18
- # def get_prompt(context, label):
19
- # base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \
20
- # Summarize only elements of the context that address vulnerability of "+label+" to climate change. \
21
- # If there is no mention of "+label+" in the context, return nothing. \
22
- # Formatting example: \
23
- # - Bullet point 1 \
24
- # - Bullet point 2 \
25
- # "
26
-
27
- # # Add the meta data for references
28
- # # context = ' - '.join([d.content for d in docs])
29
- # prompt = base_prompt+"; Context: "+context+"; Answer:"
30
-
31
- # return prompt
32
-
33
- # # def get_prompt(context, label):
34
- # # base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \
35
- # # Summarize only elements of the context that address vulnerability to climate change. \
36
- # # Formatting example: \
37
- # # - Bullet point 1 \
38
- # # - Bullet point 2 \
39
- # # "
40
-
41
- # # # Add the meta data for references
42
- # # # context = ' - '.join([d.content for d in docs])
43
- # # prompt = base_prompt+"; Context: "+context+"; Answer:"
44
-
45
- # # return prompt
46
-
47
- # # base_prompt="Summarize the following context efficiently in bullet points, the less the better- but keep concrete goals. \
48
- # # Summarize only activities that address the vulnerability of "+label+" to climate change. \
49
- # # Formatting example: \
50
- # # - Collect and utilize gender-disaggregated data to inform and improve climate change adaptation efforts. \
51
- # # - Prioritize gender sensitivity in adaptation options, ensuring participation and benefits for women, who are more vulnerable to climate impacts. \
52
- # # "
53
- # # # convert df rows to Document object so we can feed it into the summarizer easily
54
- # # def get_document(df):
55
- # # # we take a list of each extract
56
- # # ls_dict = []
57
- # # for index, row in df.iterrows():
58
- # # # Create a Document object for each row (we only need the text)
59
- # # doc = Document(
60
- # # row['text'],
61
- # # meta={
62
- # # 'label': row['Vulnerability Label']}
63
- # # )
64
- # # # Append the Document object to the documents list
65
- # # ls_dict.append(doc)
66
-
67
- # # return ls_dict
68
-
69
-
70
- # # exception handling for issuing multiple API calls to openai (exponential backoff)
71
- # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
72
- # def completion_with_backoff(**kwargs):
73
- # return openai.ChatCompletion.create(**kwargs)
74
-
75
-
76
- # # construct RAG query, send to openai and process response
77
- # def run_query(context, label):
78
- # '''
79
- # For non-streamed completion, enable the following 2 lines and comment out the code below
80
- # '''
81
- # # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}])
82
- # # result = res.choices[0].message.content
83
-
84
- # # instantiate ChatCompletion as a generator object (stream is set to True)
85
- # response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(context, label)}], stream=True)
86
- # # iterate through the streamed output
87
- # report = []
88
- # res_box = st.empty()
89
- # for chunk in response:
90
- # # extract the object containing the text (totally different structure when streaming)
91
- # chunk_message = chunk['choices'][0]['delta']
92
- # # test to make sure there is text in the object (some don't have)
93
- # if 'content' in chunk_message:
94
- # report.append(chunk_message.content) # extract the message
95
- # # add the latest text and merge it with all previous
96
- # result = "".join(report).strip()
97
- # # res_box.success(result) # output to response text box
98
- # res_box.success(result)
99
-
100
-
101
-
102
  import os
103
- # import json
104
  import numpy as np
105
  import pandas as pd
106
  import openai
@@ -121,99 +19,27 @@ def get_prompt(context, label):
121
  If there is no mention of "+label+" in the context, return nothing. \
122
  Formatting example: \
123
  - Bullet point 1 \
124
- - Bullet point 2 \
125
- "
126
-
127
- # Add the meta data for references
128
- # context = ' - '.join([d.content for d in docs])
129
  prompt = base_prompt+"; Context: "+context+"; Answer:"
130
 
131
  return prompt
132
 
133
 
134
-
135
-
136
  # # exception handling for issuing multiple API calls to openai (exponential backoff)
137
  # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
138
  # def completion_with_backoff(**kwargs):
139
  # return openai.ChatCompletion.create(**kwargs)
140
 
141
-
142
- def get_prompt(context, label):
143
- base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \
144
- Summarize only elements of the context that address vulnerability of "+label+" to climate change. \
145
- If there is no mention of "+label+" in the context, return nothing. \
146
- Do not include an introduction sentence, just the bullet points as per below. \
147
- Formatting example: \
148
- - Bullet point 1 \
149
- - Bullet point 2 \
150
- "
151
-
152
- # Add the meta data for references
153
- # context = ' - '.join([d.content for d in docs])
154
- prompt = base_prompt+"; Context: "+context+"; Answer:"
155
-
156
- return prompt
157
-
158
-
159
- # # construct RAG query, send to openai and process response
160
- # def run_query(context, label, chatbot_role):
161
- # '''
162
- # For non-streamed completion, enable the following 2 lines and comment out the code below
163
- # '''
164
- # # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}])
165
- # # result = res.choices[0].message.content
166
-
167
- # messages = [
168
- # ChatMessage(role="system", content=chatbot_role),
169
- # ChatMessage(role="user", content=get_prompt(context, label)),
170
- # ]
171
- # response = llm.chat(messages)
172
- # return(response)
173
-
174
-
175
-
176
- # tokenizer = AutoTokenizer.from_pretrained(
177
- # "meta-llama/Meta-Llama-3.1-8B-Instruct",
178
- # token=hf_token,
179
- # )
180
-
181
- # stopping_ids = [
182
- # tokenizer.eos_token_id,
183
- # tokenizer.convert_tokens_to_ids("<|eot_id|>"),
184
- # ]
185
-
186
- # Define the role of the chatbot
187
- # chatbot_role = """You are an analyst specializing in climate change impact assessments and producing insights from policy documents."""
188
-
189
- # construct RAG query, send to openai and process response
190
  def run_query(context, label):
191
  '''
192
  For non-streamed completion, enable the following 2 lines and comment out the code below
193
  '''
194
  chatbot_role = """You are an analyst specializing in climate change impact assessments and producing insights from policy documents."""
195
-
196
  messages = [{"role": "system", "content": chatbot_role},{"role": "user", "content": get_prompt(context, label)}]
197
 
198
- # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}])
199
- # result = res.choices[0].message.content
200
-
201
  # Initialize the client, pointing it to one of the available models
202
- client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token = hf_token)
203
-
204
- # response = client.chat.completions.create(
205
- # model="meta-llama/Meta-Llama-3.1-8B-Instruct",
206
- # messages=[
207
- # ChatMessage(role="system", content=chatbot_role),
208
- # ChatMessage(role="user", content=get_prompt(context, label)),
209
- # ],
210
- # stream=True,
211
- # max_tokens=500
212
- # )
213
-
214
- # iterate and print stream
215
- # for message in chat_completion:
216
- # print(message.choices[0].delta.content, end="")
217
 
218
  # instantiate ChatCompletion as a generator object (stream is set to True)
219
  # response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(context, label)}], stream=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
2
  import numpy as np
3
  import pandas as pd
4
  import openai
 
19
  If there is no mention of "+label+" in the context, return nothing. \
20
  Formatting example: \
21
  - Bullet point 1 \
22
+ - Bullet point 2 "
 
 
 
 
23
  prompt = base_prompt+"; Context: "+context+"; Answer:"
24
 
25
  return prompt
26
 
27
 
 
 
28
  # # exception handling for issuing multiple API calls to openai (exponential backoff)
29
  # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
30
  # def completion_with_backoff(**kwargs):
31
  # return openai.ChatCompletion.create(**kwargs)
32
 
33
+ # construct query, send to HF API and process response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  def run_query(context, label):
35
  '''
36
  For non-streamed completion, enable the following 2 lines and comment out the code below
37
  '''
38
  chatbot_role = """You are an analyst specializing in climate change impact assessments and producing insights from policy documents."""
 
39
  messages = [{"role": "system", "content": chatbot_role},{"role": "user", "content": get_prompt(context, label)}]
40
 
 
 
 
41
  # Initialize the client, pointing it to one of the available models
42
+ client = InferenceClient("meta-llama/Meta-Llama-3.1-8B-Instruct", token = hf_token)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # instantiate ChatCompletion as a generator object (stream is set to True)
45
  # response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(context, label)}], stream=True)