File size: 9,328 Bytes
5729146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b125eed
 
 
 
 
 
 
 
5729146
b125eed
 
 
3ad72e2
5729146
b125eed
 
ffd98eb
6d6cf98
2008110
 
b125eed
0a16a11
 
b125eed
 
 
 
 
 
 
 
2008110
 
b125eed
5729146
 
 
 
b125eed
 
5729146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b125eed
 
5729146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b125eed
ffd98eb
b125eed
 
 
5729146
 
ceedd37
 
b125eed
 
 
5729146
ceedd37
 
 
 
 
 
 
 
 
 
 
5729146
 
6e9a62d
 
5729146
b125eed
5729146
b125eed
 
 
d11c400
b125eed
 
 
 
4fc9fba
b125eed
 
 
 
 
 
 
 
 
 
 
5729146
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# import os
# # import json
# import numpy as np
# import pandas as pd
# import openai
# from haystack.schema import Document
# import streamlit as st
# from tenacity import retry, stop_after_attempt, wait_random_exponential


# # Get openai API key
# # openai.api_key = os.environ["OPENAI_API_KEY"]
# hf_token = os.environ["HF_API_KEY"]
# #model_select = "gpt-3.5-turbo-0125"
# model_select ="gpt-4"

# # define a special function for putting the prompt together (as we can't use haystack)
# def get_prompt(context, label):
#   base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \
#   Summarize only elements of the context that address vulnerability of "+label+" to climate change. \
#   If there is no mention of "+label+" in the context, return nothing. \
#   Formatting example: \
#     - Bullet point 1 \
#     - Bullet point 2 \
# "

#   # Add the meta data for references
#   # context = ' - '.join([d.content for d in docs])
#   prompt = base_prompt+"; Context: "+context+"; Answer:"
  
#   return prompt

# # def get_prompt(context, label):
# #   base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \
# #   Summarize only elements of the context that address vulnerability to climate change. \
# #   Formatting example: \
# #     - Bullet point 1 \
# #     - Bullet point 2 \
# # "

# #   # Add the meta data for references
# #   # context = ' - '.join([d.content for d in docs])
# #   prompt = base_prompt+"; Context: "+context+"; Answer:"
  
# #   return prompt

# #   base_prompt="Summarize the following context efficiently in bullet points, the less the better- but keep concrete goals. \
# #   Summarize only activities that address the vulnerability of "+label+" to climate change. \
# #   Formatting example: \
# #     - Collect and utilize gender-disaggregated data to inform and improve climate change adaptation efforts. \
# #     - Prioritize gender sensitivity in adaptation options, ensuring participation and benefits for women, who are more vulnerable to climate impacts. \
# # "
# # # convert df rows to Document object so we can feed it into the summarizer easily
# # def get_document(df):
# #     # we take a list of each extract
# #     ls_dict = []
# #     for index, row in df.iterrows():
# #         # Create a Document object for each row (we only need the text)
# #         doc = Document(
# #             row['text'],
# #             meta={
# #             'label': row['Vulnerability Label']}
# #         )
# #         # Append the Document object to the documents list
# #         ls_dict.append(doc)

# #     return ls_dict 


# # exception handling for issuing multiple API calls to openai (exponential backoff)
# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
# def completion_with_backoff(**kwargs):
#     return openai.ChatCompletion.create(**kwargs)


# # construct RAG query, send to openai and process response
# def run_query(context, label):
#     '''
#     For non-streamed completion, enable the following 2 lines and comment out the code below
#     '''
#     # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}])
#     # result = res.choices[0].message.content

#     # instantiate ChatCompletion as a generator object (stream is set to True)
#     response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(context, label)}], stream=True)
#     # iterate through the streamed output
#     report = []
#     res_box = st.empty()
#     for chunk in response:
#         # extract the object containing the text (totally different structure when streaming)
#         chunk_message = chunk['choices'][0]['delta']
#         # test to make sure there is text in the object (some don't have)
#         if 'content' in chunk_message:
#             report.append(chunk_message.content) # extract the message
#             # add the latest text and merge it with all previous
#             result = "".join(report).strip()
#             # res_box.success(result) # output to response text box
#             res_box.success(result)



import os
# import json
import numpy as np
import pandas as pd
import openai
from haystack.schema import Document
import streamlit as st
from tenacity import retry, stop_after_attempt, wait_random_exponential
from huggingface_hub import InferenceClient


# Get openai API key
hf_token = os.environ["HF_API_KEY"]


# define a special function for putting the prompt together (as we can't use haystack)
def get_prompt(context, label):
  base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \
  Summarize only elements of the context that address vulnerability of "+label+" to climate change. \
  If there is no mention of "+label+" in the context, return nothing. \
  Formatting example: \
    - Bullet point 1 \
    - Bullet point 2 \
"

  # Add the meta data for references
  # context = ' - '.join([d.content for d in docs])
  prompt = base_prompt+"; Context: "+context+"; Answer:"
  
  return prompt




# # exception handling for issuing multiple API calls to openai (exponential backoff)
# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
# def completion_with_backoff(**kwargs):
#     return openai.ChatCompletion.create(**kwargs)


def get_prompt(context, label):
  base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \
  Summarize only elements of the context that address vulnerability of "+label+" to climate change. \
  If there is no mention of "+label+" in the context, return nothing. \
  Do not include an introduction sentence, just the bullet points as per below. \
  Formatting example: \
    - Bullet point 1 \
    - Bullet point 2 \
"

  # Add the meta data for references
  # context = ' - '.join([d.content for d in docs])
  prompt = base_prompt+"; Context: "+context+"; Answer:"

  return prompt


# # construct RAG query, send to openai and process response
# def run_query(context, label, chatbot_role):
#     '''
#     For non-streamed completion, enable the following 2 lines and comment out the code below
#     '''
#     # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}])
#     # result = res.choices[0].message.content

#     messages = [
#       ChatMessage(role="system", content=chatbot_role),
#       ChatMessage(role="user", content=get_prompt(context, label)),
#     ]
#     response = llm.chat(messages)
#     return(response)



# tokenizer = AutoTokenizer.from_pretrained(
#     "meta-llama/Meta-Llama-3.1-8B-Instruct",
#     token=hf_token,
# )

# stopping_ids = [
#     tokenizer.eos_token_id,
#     tokenizer.convert_tokens_to_ids("<|eot_id|>"),
# ]

# Define the role of the chatbot
# chatbot_role = """You are an analyst specializing in climate change impact assessments and producing insights from policy documents."""

# construct RAG query, send to openai and process response
def run_query(context, label):
    '''
    For non-streamed completion, enable the following 2 lines and comment out the code below
    '''
    chatbot_role = """You are an analyst specializing in climate change impact assessments and producing insights from policy documents."""

    messages = [{"role": "system", "content": chatbot_role},{"role": "user", "content": get_prompt(context, label)}]
    
    # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}])
    # result = res.choices[0].message.content

    # Initialize the client, pointing it to one of the available models
    client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token = hf_token)

    # response = client.chat.completions.create(
    #     model="meta-llama/Meta-Llama-3.1-8B-Instruct",
    #     messages=[
    #       ChatMessage(role="system", content=chatbot_role),
    #       ChatMessage(role="user", content=get_prompt(context, label)),
    #     ],
    #     stream=True,
    #     max_tokens=500
    # )

    # iterate and print stream
    # for message in chat_completion:
    #     print(message.choices[0].delta.content, end="")

    # instantiate ChatCompletion as a generator object (stream is set to True)
    # response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(context, label)}], stream=True)
    # iterate through the streamed output
    report = []
    res_box = st.empty()
    for chunk in client.chat_completion(messages, stream=True):
        # extract the object containing the text (totally different structure when streaming)
        chunk_message = chunk['choices'][0]['delta']
        # test to make sure there is text in the object (some don't have)
        if 'content' in chunk_message:
            report.append(chunk_message['content']) # extract the message
            # add the latest text and merge it with all previous
            result = "".join(report).strip()
            # res_box.success(result) # output to response text box
            res_box.success(result)