Spaces:

GIZ
/

climate_vulnerability_analysis

Running on CPU Upgrade

App Files Files Community

climate_vulnerability_analysis / appStore /rag.py

leavoigt

fix bug in summary generation

f93aadf verified about 2 months ago

raw

history blame

3.61 kB

	import os
	import numpy as np
	import pandas as pd
	import openai
	from haystack.schema import Document
	import streamlit as st
	from tenacity import retry, stop_after_attempt, wait_random_exponential
	from huggingface_hub import InferenceClient


	# Get openai API key
	hf_token = os.environ["HF_API_KEY"]

	# define a special function for putting the prompt together (as we can't use haystack)
	def get_prompt(context, label):
	base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \
	Summarize only elements of the context that address vulnerability of "+label+" to climate change. \
	If there is no mention of "+label+" in the context, return: 'No clear references to vulnerability of "+label+" found'. \
	Do not include an introduction sentence, just the bullet points as per below. \
	Formatting example: \
	- Bullet point 1 \
	- Bullet point 2 \
	"

	prompt = base_prompt+"; Context: "+context+"; Answer:"

	return prompt


	# # exception handling for issuing multiple API calls to openai (exponential backoff)
	# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
	# def completion_with_backoff(**kwargs):
	# return openai.ChatCompletion.create(**kwargs)

	class ChatCompletionResult:
	def __init__(self):
	self.content = ""

	def add_content(self, text):
	self.content += text

	def get_full_content(self):
	return self.content.strip()

	def run_query(context, label, model_sel_name):
	'''
	Summarize provided test
	'''
	chatbot_role = """You are an analyst specializing in climate change impact assessments and producing insights from policy documents."""

	### adding new template which doesnt need 'system' role [Further investigation needed if its applicable to all mmodels or not]
	messages = [{"role": "user", "content": chatbot_role + get_prompt(context, label)}]


	## old template
	#messages = [{"role": "system", "content": chatbot_role},{"role": "user", "content": get_prompt(context, label)}]

	# Initialize the client, pointing it to one of the available models
	client = InferenceClient(model_sel_name, token=hf_token)

	# # Instantiate ChatCompletion as a generator object (stream is set to True)
	# chat_completion = client.chat.completions.create(
	# messages=messages,
	# stream=True
	# )

	# # Create an object to store the full chat completion
	# completion_result = ChatCompletionResult()
	# res_box = st.empty()

	# # Iterate through the streamed output
	# for chunk in chat_completion:
	# # Extract the object containing the text
	# if chunk.choices is not None:
	# chunk_message = chunk.choices[0].delta
	# if 'content' in chunk_message:
	# completion_result.add_content(chunk_message['content']) # Store the message
	# # Add the latest text and merge it with all previous
	# result = completion_result.get_full_content()
	# res_box.success(result) # Output to response text box


	# Use streaming text generation
	response_stream = client.text_generation(prompt, stream=True, max_new_tokens=512)

	completion_result = ChatCompletionResult()
	res_box = st.empty()

	for chunk in response_stream:
	completion_result.add_content(chunk)
	result = completion_result.get_full_content()
	res_box.success(result)

	return completion_result

	# # Return the stored chat completion object for later use
	# return completion_result