Spaces:

ajalisatgi
/

Gradio

Sleeping

App Files Files Community

Gradio / app.py

ajalisatgi

Update app.py

e4cfa32 verified 5 months ago

raw

history blame

3.96 kB

	import torch
	import gradio as gr
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import Chroma
	import openai
	import time
	import logging
	from datasets import load_dataset
	from nltk.tokenize import sent_tokenize
	import nltk

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Initialize OpenAI API key
	openai.api_key = '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' # Replace with your API key

	# Download NLTK data
	nltk.download('punkt')

	# Initialize models and configurations
	model_name = 'intfloat/e5-small'
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	embedding_model = HuggingFaceEmbeddings(model_name=model_name)
	embedding_model.client.to(device)

	# Initialize Chroma with existing database
	vectordb = Chroma(
	persist_directory='./docs/chroma/',
	embedding_function=embedding_model
	)

	def process_query(query):
	try:
	logger.info(f"Processing query: {query}")

	# Get relevant documents
	relevant_docs = vectordb.similarity_search(query, k=30)
	context = " ".join([doc.page_content for doc in relevant_docs])

	# Add delay to respect API rate limits
	time.sleep(1)

	# Generate response using OpenAI
	response = openai.chat.completions.create(
	model="gpt-4",
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": f"Given the document: {context}\n\nGenerate a response to the query: {query}"}
	],
	max_tokens=300,
	temperature=0.7,
	)

	answer = response.choices[0].message.content.strip()
	logger.info("Successfully generated response")

	# Extract and display metrics
	metrics = extract_metrics(query, answer, relevant_docs)

	return answer, metrics

	except Exception as e:
	logger.error(f"Error processing query: {str(e)}")
	return f"Error: {str(e)}", "Metrics unavailable"

	def extract_metrics(query, response, relevant_docs):
	try:
	context = " ".join([doc.page_content for doc in relevant_docs])
	metrics_prompt = f"""
	Question: {query}
	Context: {context}
	Response: {response}

	Extract metrics for:
	- Context Relevance
	- Context Utilization
	- Completeness
	- Response Quality
	"""

	metrics_response = openai.chat.completions.create(
	model="gpt-4",
	messages=[{"role": "user", "content": metrics_prompt}],
	max_tokens=150,
	temperature=0.7,
	)

	return metrics_response.choices[0].message.content.strip()
	except Exception as e:
	return "Metrics calculation failed"

	# Create Gradio interface
	demo = gr.Interface(
	fn=process_query,
	inputs=[
	gr.Textbox(
	label="Enter your question",
	placeholder="Type your question here...",
	lines=2
	)
	],
	outputs=[
	gr.Textbox(label="Answer", lines=5),
	gr.Textbox(label="Metrics", lines=4)
	],
	title="RAG-Powered Question Answering System",
	description="Ask questions and get answers based on the embedded document knowledge.",
	examples=[
	["What role does T-cell count play in severe human adenovirus type 55 (HAdV-55) infection?"],
	["In what school district is Governor John R. Rogers High School located?"],
	["Is there a functional neural correlate of individual differences in cardiovascular reactivity?"],
	["How do I select Natural mode?"]
	]
	)

	# Launch with debugging enabled
	if __name__ == "__main__":
	demo.launch(debug=True)