Vendor-contract-extractor

Running

App Files Files Community

Vendor-contract-extractor / app.py

Jyothikamalesh

Update app.py

2efa6f5 verified 5 months ago

raw

history blame

3.45 kB

	import gradio as gr
	from openai import OpenAI, APIError
	import os
	import tenacity
	import asyncio

	ACCESS_TOKEN = os.getenv("HF_TOKEN")

	client = OpenAI(
	base_url="https://api-inference.huggingface.co/v1/",
	api_key=ACCESS_TOKEN,
	)

	# Retry logic with tenacity for handling API rate limits
	@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5))
	async def respond(
	message,
	system_message,
	max_tokens,
	temperature,
	top_p,
	):
	try:
	# Only use the system message and the current message for the response
	messages = [{"role": "system", "content": system_message},
	{"role": "user", "content": message}]

	response = ""
	# Properly stream chat completions using dot notation
	stream = client.chat.completions.create(
	model="NousResearch/Hermes-3-Llama-3.1-8B",
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	messages=messages,
	)

	# Stream response and concatenate tokens
	for chunk in stream:
	if hasattr(chunk.choices[0].delta, 'content'):
	token = chunk.choices[0].delta.content
	response += token

	return response

	except APIError as e:
	# Handle both string and dict types of error bodies
	error_details = e.body
	if isinstance(error_details, dict):
	error_type = error_details.get("type", "Unknown")
	error_code = error_details.get("code", "Unknown")
	error_param = error_details.get("param", "Unknown")
	error_message = error_details.get("message", "An error occurred.")
	error_str = f"{error_type}: {error_message} (code: {error_code}, param: {error_param})"
	else:
	error_str = f"Error: {error_details}"

	print(f"APIError: {error_str}")
	return error_str

	except Exception as e:
	print(f"Exception: {e}")
	return "Error occurred. Please try again."


	# Async Gradio function to handle user input and response generation without history
	async def generate_response(message, system_message, max_tokens, temperature, top_p):
	response = await respond(message, system_message, max_tokens, temperature, top_p)
	return response


	def launch_app():
	try:
	demo = gr.Blocks()
	with demo:
	gr.Markdown("# Chatbot")
	message = gr.Textbox(label="Message")
	system_message = gr.Textbox(label="System message")
	max_tokens = gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens")
	temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
	top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
	response = gr.Text(label="Response")

	# Use the async version of generate_response without history
	gr.Button("Generate Response").click(
	generate_response,
	inputs=[message, system_message, max_tokens, temperature, top_p],
	outputs=[response],
	show_progress=False,
	)
	demo.launch(show_error=True)
	except KeyError as e:
	print(f"Error: {e}")
	print("Please try again.")

	if __name__ == "__main__":
	launch_app()