Spaces:
Sleeping
Sleeping
File size: 3,955 Bytes
1d91ffa e4cfa32 1d91ffa bc10f71 7e1dfd1 1d91ffa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import torch
import gradio as gr
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
import openai
import time
import logging
from datasets import load_dataset
from nltk.tokenize import sent_tokenize
import nltk
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Initialize OpenAI API key
openai.api_key = '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' # Replace with your API key
# Download NLTK data
nltk.download('punkt')
# Initialize models and configurations
model_name = 'intfloat/e5-small'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
embedding_model = HuggingFaceEmbeddings(model_name=model_name)
embedding_model.client.to(device)
# Initialize Chroma with existing database
vectordb = Chroma(
persist_directory='./docs/chroma/',
embedding_function=embedding_model
)
def process_query(query):
try:
logger.info(f"Processing query: {query}")
# Get relevant documents
relevant_docs = vectordb.similarity_search(query, k=30)
context = " ".join([doc.page_content for doc in relevant_docs])
# Add delay to respect API rate limits
time.sleep(1)
# Generate response using OpenAI
response = openai.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": f"Given the document: {context}\n\nGenerate a response to the query: {query}"}
],
max_tokens=300,
temperature=0.7,
)
answer = response.choices[0].message.content.strip()
logger.info("Successfully generated response")
# Extract and display metrics
metrics = extract_metrics(query, answer, relevant_docs)
return answer, metrics
except Exception as e:
logger.error(f"Error processing query: {str(e)}")
return f"Error: {str(e)}", "Metrics unavailable"
def extract_metrics(query, response, relevant_docs):
try:
context = " ".join([doc.page_content for doc in relevant_docs])
metrics_prompt = f"""
Question: {query}
Context: {context}
Response: {response}
Extract metrics for:
- Context Relevance
- Context Utilization
- Completeness
- Response Quality
"""
metrics_response = openai.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": metrics_prompt}],
max_tokens=150,
temperature=0.7,
)
return metrics_response.choices[0].message.content.strip()
except Exception as e:
return "Metrics calculation failed"
# Create Gradio interface
demo = gr.Interface(
fn=process_query,
inputs=[
gr.Textbox(
label="Enter your question",
placeholder="Type your question here...",
lines=2
)
],
outputs=[
gr.Textbox(label="Answer", lines=5),
gr.Textbox(label="Metrics", lines=4)
],
title="RAG-Powered Question Answering System",
description="Ask questions and get answers based on the embedded document knowledge.",
examples=[
["What role does T-cell count play in severe human adenovirus type 55 (HAdV-55) infection?"],
["In what school district is Governor John R. Rogers High School located?"],
["Is there a functional neural correlate of individual differences in cardiovascular reactivity?"],
["How do I select Natural mode?"]
]
)
# Launch with debugging enabled
if __name__ == "__main__":
demo.launch(debug=True)
|