ajalisatgi commited on
Commit
a48a101
·
verified ·
1 Parent(s): e9e55ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -32
app.py CHANGED
@@ -10,36 +10,44 @@ logger = logging.getLogger(__name__)
10
  # Initialize OpenAI API key
11
  openai.api_key = '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'
12
 
13
- # Load just one dataset to start
14
- dataset = load_dataset("rungalileo/ragbench", "hotpotqa", split='train')
15
- logger.info("Dataset loaded successfully")
 
 
16
 
17
- import gradio as gr
18
- import openai
19
- from datasets import load_dataset
20
- import logging
21
-
22
- # Set up logging
23
- logging.basicConfig(level=logging.INFO)
24
- logger = logging.getLogger(__name__)
25
-
26
- # Initialize OpenAI API key
27
- openai.api_key = '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'
28
-
29
- # Load just one dataset to start
30
- dataset = load_dataset("rungalileo/ragbench", "hotpotqa", split='train')
31
- logger.info("Dataset loaded successfully")
32
 
33
- def process_query(query):
34
  try:
35
- # Get relevant documents
36
- context = dataset['documents'][0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  response = openai.chat.completions.create(
39
  model="gpt-3.5-turbo",
40
  messages=[
41
- {"role": "system", "content": "You are a confident expert assistant. Provide direct, clear answers based on the available information. Focus on what you can determine from the context and suggest exploring related topics when needed. Never apologize - maintain a positive, solution-focused tone."},
42
- {"role": "user", "content": f"Context: {context}\nQuestion: {query}"}
43
  ],
44
  max_tokens=300,
45
  temperature=0.7,
@@ -48,21 +56,28 @@ def process_query(query):
48
  return response.choices[0].message.content.strip()
49
 
50
  except Exception as e:
51
- return f"Let's explore information about {query} from other sections of our database. What specific aspects would you like to know more about?"
52
 
53
- # Create simple Gradio interface
54
  demo = gr.Interface(
55
  fn=process_query,
56
- inputs=gr.Textbox(label="Question"),
57
- outputs=gr.Textbox(label="Answer"),
58
- title="RagBench QA System",
59
- description="Ask questions about HotpotQA dataset",
 
 
 
 
 
 
 
60
  examples=[
61
- ["What role does T-cell count play in severe human adenovirus type 55 (HAdV-55) infection?"],
62
- ["In what school district is Governor John R. Rogers High School located?"],
 
63
  ]
64
  )
65
 
66
  if __name__ == "__main__":
67
  demo.launch(debug=True)
68
-
 
10
  # Initialize OpenAI API key
11
  openai.api_key = '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'
12
 
13
+ # Load all RagBench datasets
14
+ datasets = {}
15
+ dataset_names = ['covidqa', 'cuad', 'delucionqa', 'emanual', 'expertqa',
16
+ 'finqa', 'hagrid', 'hotpotqa', 'msmarco', 'pubmedqa',
17
+ 'tatqa', 'techqa']
18
 
19
+ for name in dataset_names:
20
+ try:
21
+ datasets[name] = load_dataset("rungalileo/ragbench", name, split='train')
22
+ logger.info(f"Successfully loaded {name}")
23
+ except Exception as e:
24
+ logger.info(f"Skipping {name}: {str(e)}")
 
 
 
 
 
 
 
 
 
25
 
26
+ def process_query(query, dataset_choice="all"):
27
  try:
28
+ relevant_contexts = []
29
+
30
+ # Search through selected or all datasets
31
+ search_datasets = [dataset_choice] if dataset_choice != "all" else datasets.keys()
32
+
33
+ for dataset_name in search_datasets:
34
+ if dataset_name in datasets:
35
+ for doc in datasets[dataset_name]['documents']:
36
+ if any(keyword.lower() in doc.lower() for keyword in query.split()):
37
+ relevant_contexts.append((doc, dataset_name))
38
+
39
+ # Use the most relevant context
40
+ if relevant_contexts:
41
+ context, source = relevant_contexts[0]
42
+ context_info = f"From {source}: {context}"
43
+ else:
44
+ context_info = "Searching across all available datasets..."
45
 
46
  response = openai.chat.completions.create(
47
  model="gpt-3.5-turbo",
48
  messages=[
49
+ {"role": "system", "content": "You are a knowledgeable expert. Provide direct, informative answers based on the available data."},
50
+ {"role": "user", "content": f"Context: {context_info}\nQuestion: {query}"}
51
  ],
52
  max_tokens=300,
53
  temperature=0.7,
 
56
  return response.choices[0].message.content.strip()
57
 
58
  except Exception as e:
59
+ return f"Currently searching through all available datasets for information about {query}."
60
 
61
+ # Enhanced Gradio interface with dataset selection
62
  demo = gr.Interface(
63
  fn=process_query,
64
+ inputs=[
65
+ gr.Textbox(label="Question", placeholder="Ask any question..."),
66
+ gr.Dropdown(
67
+ choices=["all"] + dataset_names,
68
+ label="Select Dataset",
69
+ value="all"
70
+ )
71
+ ],
72
+ outputs=gr.Textbox(label="Expert Response"),
73
+ title="Multi-Dataset Knowledge Base",
74
+ description="Search across all RagBench datasets for comprehensive information",
75
  examples=[
76
+ ["What role does T-cell count play in severe human adenovirus type 55 (HAdV-55) infection?", "covidqa"],
77
+ ["In what school district is Governor John R. Rogers High School located?", "hotpotqa"],
78
+ ["What are the key financial metrics for Q3?", "finqa"]
79
  ]
80
  )
81
 
82
  if __name__ == "__main__":
83
  demo.launch(debug=True)