Gradio / app.py
ajalisatgi's picture
Update app.py
a48a101 verified
raw
history blame
3.19 kB
import gradio as gr
import openai
from datasets import load_dataset
import logging
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Initialize OpenAI API key
openai.api_key = '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'
# Load all RagBench datasets
datasets = {}
dataset_names = ['covidqa', 'cuad', 'delucionqa', 'emanual', 'expertqa',
'finqa', 'hagrid', 'hotpotqa', 'msmarco', 'pubmedqa',
'tatqa', 'techqa']
for name in dataset_names:
try:
datasets[name] = load_dataset("rungalileo/ragbench", name, split='train')
logger.info(f"Successfully loaded {name}")
except Exception as e:
logger.info(f"Skipping {name}: {str(e)}")
def process_query(query, dataset_choice="all"):
try:
relevant_contexts = []
# Search through selected or all datasets
search_datasets = [dataset_choice] if dataset_choice != "all" else datasets.keys()
for dataset_name in search_datasets:
if dataset_name in datasets:
for doc in datasets[dataset_name]['documents']:
if any(keyword.lower() in doc.lower() for keyword in query.split()):
relevant_contexts.append((doc, dataset_name))
# Use the most relevant context
if relevant_contexts:
context, source = relevant_contexts[0]
context_info = f"From {source}: {context}"
else:
context_info = "Searching across all available datasets..."
response = openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a knowledgeable expert. Provide direct, informative answers based on the available data."},
{"role": "user", "content": f"Context: {context_info}\nQuestion: {query}"}
],
max_tokens=300,
temperature=0.7,
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"Currently searching through all available datasets for information about {query}."
# Enhanced Gradio interface with dataset selection
demo = gr.Interface(
fn=process_query,
inputs=[
gr.Textbox(label="Question", placeholder="Ask any question..."),
gr.Dropdown(
choices=["all"] + dataset_names,
label="Select Dataset",
value="all"
)
],
outputs=gr.Textbox(label="Expert Response"),
title="Multi-Dataset Knowledge Base",
description="Search across all RagBench datasets for comprehensive information",
examples=[
["What role does T-cell count play in severe human adenovirus type 55 (HAdV-55) infection?", "covidqa"],
["In what school district is Governor John R. Rogers High School located?", "hotpotqa"],
["What are the key financial metrics for Q3?", "finqa"]
]
)
if __name__ == "__main__":
demo.launch(debug=True)