Spaces:

ajalisatgi
/

Gradio

Sleeping

File size: 3,955 Bytes

1d91ffa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4cfa32
1d91ffa
 
 
 
bc10f71
 
 
 
 
 
 
 
 
 
7e1dfd1
1d91ffa

import torch
import gradio as gr
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
import openai
import time
import logging
from datasets import load_dataset
from nltk.tokenize import sent_tokenize
import nltk

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize OpenAI API key
openai.api_key = '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'  # Replace with your API key

# Download NLTK data
nltk.download('punkt')

# Initialize models and configurations
model_name = 'intfloat/e5-small'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
embedding_model = HuggingFaceEmbeddings(model_name=model_name)
embedding_model.client.to(device)

# Initialize Chroma with existing database
vectordb = Chroma(
    persist_directory='./docs/chroma/',
    embedding_function=embedding_model
)

def process_query(query):
    try:
        logger.info(f"Processing query: {query}")
        
        # Get relevant documents
        relevant_docs = vectordb.similarity_search(query, k=30)
        context = " ".join([doc.page_content for doc in relevant_docs])
        
        # Add delay to respect API rate limits
        time.sleep(1)
        
        # Generate response using OpenAI
        response = openai.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": f"Given the document: {context}\n\nGenerate a response to the query: {query}"}
            ],
            max_tokens=300,
            temperature=0.7,
        )
        
        answer = response.choices[0].message.content.strip()
        logger.info("Successfully generated response")
        
        # Extract and display metrics
        metrics = extract_metrics(query, answer, relevant_docs)
        
        return answer, metrics
        
    except Exception as e:
        logger.error(f"Error processing query: {str(e)}")
        return f"Error: {str(e)}", "Metrics unavailable"

def extract_metrics(query, response, relevant_docs):
    try:
        context = " ".join([doc.page_content for doc in relevant_docs])
        metrics_prompt = f"""
        Question: {query}
        Context: {context}
        Response: {response}
        
        Extract metrics for:
        - Context Relevance
        - Context Utilization
        - Completeness
        - Response Quality
        """
        
        metrics_response = openai.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": metrics_prompt}],
            max_tokens=150,
            temperature=0.7,
        )
        
        return metrics_response.choices[0].message.content.strip()
    except Exception as e:
        return "Metrics calculation failed"

# Create Gradio interface
demo = gr.Interface(
    fn=process_query,
    inputs=[
        gr.Textbox(
            label="Enter your question",
            placeholder="Type your question here...",
            lines=2
        )
    ],
    outputs=[
        gr.Textbox(label="Answer", lines=5),
        gr.Textbox(label="Metrics", lines=4)
    ],
    title="RAG-Powered Question Answering System",
    description="Ask questions and get answers based on the embedded document knowledge.",
    examples=[
        ["What role does T-cell count play in severe human adenovirus type 55 (HAdV-55) infection?"],
        ["In what school district is Governor John R. Rogers High School located?"],
        ["Is there a functional neural correlate of individual differences in cardiovascular reactivity?"],
        ["How do I select Natural mode?"]
    ]
)

# Launch with debugging enabled
if __name__ == "__main__":
    demo.launch(debug=True)