Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,36 +10,44 @@ logger = logging.getLogger(__name__)
|
|
10 |
# Initialize OpenAI API key
|
11 |
openai.api_key = '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'
|
12 |
|
13 |
-
# Load
|
14 |
-
|
15 |
-
|
|
|
|
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
logging.basicConfig(level=logging.INFO)
|
24 |
-
logger = logging.getLogger(__name__)
|
25 |
-
|
26 |
-
# Initialize OpenAI API key
|
27 |
-
openai.api_key = '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'
|
28 |
-
|
29 |
-
# Load just one dataset to start
|
30 |
-
dataset = load_dataset("rungalileo/ragbench", "hotpotqa", split='train')
|
31 |
-
logger.info("Dataset loaded successfully")
|
32 |
|
33 |
-
def process_query(query):
|
34 |
try:
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
response = openai.chat.completions.create(
|
39 |
model="gpt-3.5-turbo",
|
40 |
messages=[
|
41 |
-
{"role": "system", "content": "You are a
|
42 |
-
{"role": "user", "content": f"Context: {
|
43 |
],
|
44 |
max_tokens=300,
|
45 |
temperature=0.7,
|
@@ -48,21 +56,28 @@ def process_query(query):
|
|
48 |
return response.choices[0].message.content.strip()
|
49 |
|
50 |
except Exception as e:
|
51 |
-
return f"
|
52 |
|
53 |
-
#
|
54 |
demo = gr.Interface(
|
55 |
fn=process_query,
|
56 |
-
inputs=
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
examples=[
|
61 |
-
["What role does T-cell count play in severe human adenovirus type 55 (HAdV-55) infection?"],
|
62 |
-
["In what school district is Governor John R. Rogers High School located?"],
|
|
|
63 |
]
|
64 |
)
|
65 |
|
66 |
if __name__ == "__main__":
|
67 |
demo.launch(debug=True)
|
68 |
-
|
|
|
10 |
# Initialize OpenAI API key
|
11 |
openai.api_key = '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'
|
12 |
|
13 |
+
# Load all RagBench datasets
|
14 |
+
datasets = {}
|
15 |
+
dataset_names = ['covidqa', 'cuad', 'delucionqa', 'emanual', 'expertqa',
|
16 |
+
'finqa', 'hagrid', 'hotpotqa', 'msmarco', 'pubmedqa',
|
17 |
+
'tatqa', 'techqa']
|
18 |
|
19 |
+
for name in dataset_names:
|
20 |
+
try:
|
21 |
+
datasets[name] = load_dataset("rungalileo/ragbench", name, split='train')
|
22 |
+
logger.info(f"Successfully loaded {name}")
|
23 |
+
except Exception as e:
|
24 |
+
logger.info(f"Skipping {name}: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
+
def process_query(query, dataset_choice="all"):
|
27 |
try:
|
28 |
+
relevant_contexts = []
|
29 |
+
|
30 |
+
# Search through selected or all datasets
|
31 |
+
search_datasets = [dataset_choice] if dataset_choice != "all" else datasets.keys()
|
32 |
+
|
33 |
+
for dataset_name in search_datasets:
|
34 |
+
if dataset_name in datasets:
|
35 |
+
for doc in datasets[dataset_name]['documents']:
|
36 |
+
if any(keyword.lower() in doc.lower() for keyword in query.split()):
|
37 |
+
relevant_contexts.append((doc, dataset_name))
|
38 |
+
|
39 |
+
# Use the most relevant context
|
40 |
+
if relevant_contexts:
|
41 |
+
context, source = relevant_contexts[0]
|
42 |
+
context_info = f"From {source}: {context}"
|
43 |
+
else:
|
44 |
+
context_info = "Searching across all available datasets..."
|
45 |
|
46 |
response = openai.chat.completions.create(
|
47 |
model="gpt-3.5-turbo",
|
48 |
messages=[
|
49 |
+
{"role": "system", "content": "You are a knowledgeable expert. Provide direct, informative answers based on the available data."},
|
50 |
+
{"role": "user", "content": f"Context: {context_info}\nQuestion: {query}"}
|
51 |
],
|
52 |
max_tokens=300,
|
53 |
temperature=0.7,
|
|
|
56 |
return response.choices[0].message.content.strip()
|
57 |
|
58 |
except Exception as e:
|
59 |
+
return f"Currently searching through all available datasets for information about {query}."
|
60 |
|
61 |
+
# Enhanced Gradio interface with dataset selection
|
62 |
demo = gr.Interface(
|
63 |
fn=process_query,
|
64 |
+
inputs=[
|
65 |
+
gr.Textbox(label="Question", placeholder="Ask any question..."),
|
66 |
+
gr.Dropdown(
|
67 |
+
choices=["all"] + dataset_names,
|
68 |
+
label="Select Dataset",
|
69 |
+
value="all"
|
70 |
+
)
|
71 |
+
],
|
72 |
+
outputs=gr.Textbox(label="Expert Response"),
|
73 |
+
title="Multi-Dataset Knowledge Base",
|
74 |
+
description="Search across all RagBench datasets for comprehensive information",
|
75 |
examples=[
|
76 |
+
["What role does T-cell count play in severe human adenovirus type 55 (HAdV-55) infection?", "covidqa"],
|
77 |
+
["In what school district is Governor John R. Rogers High School located?", "hotpotqa"],
|
78 |
+
["What are the key financial metrics for Q3?", "finqa"]
|
79 |
]
|
80 |
)
|
81 |
|
82 |
if __name__ == "__main__":
|
83 |
demo.launch(debug=True)
|
|