add_poc_french_local_insights

#18
by timeki - opened
.gitignore CHANGED
@@ -11,3 +11,5 @@ notebooks/
11
 
12
  data/
13
  sandbox/
 
 
 
11
 
12
  data/
13
  sandbox/
14
+
15
+ *.db
app.py CHANGED
@@ -1,54 +1,30 @@
1
- from climateqa.engine.embeddings import get_embeddings_function
2
- embeddings_function = get_embeddings_function()
3
-
4
- from sentence_transformers import CrossEncoder
5
-
6
- # reranker = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
7
-
8
- import gradio as gr
9
- from gradio_modal import Modal
10
- import pandas as pd
11
- import numpy as np
12
  import os
13
- import time
14
- import re
15
- import json
16
-
17
- from gradio import ChatMessage
18
-
19
- # from gradio_modal import Modal
20
-
21
- from io import BytesIO
22
- import base64
23
 
24
- from datetime import datetime
25
  from azure.storage.fileshare import ShareServiceClient
26
 
27
- from utils import create_user_id
28
-
29
- from gradio_modal import Modal
30
-
31
- from PIL import Image
32
-
33
- from langchain_core.runnables.schema import StreamEvent
34
-
35
- # ClimateQ&A imports
36
  from climateqa.engine.llm import get_llm
37
  from climateqa.engine.vectorstore import get_pinecone_vectorstore
38
- # from climateqa.knowledge.retriever import ClimateQARetriever
39
  from climateqa.engine.reranker import get_reranker
40
- from climateqa.engine.embeddings import get_embeddings_function
41
- from climateqa.engine.chains.prompts import audience_prompts
42
- from climateqa.sample_questions import QUESTIONS
43
- from climateqa.constants import POSSIBLE_REPORTS, OWID_CATEGORIES
44
- from climateqa.utils import get_image_from_azure_blob_storage
45
- from climateqa.engine.graph import make_graph_agent
46
- from climateqa.engine.embeddings import get_embeddings_function
47
  from climateqa.engine.chains.retrieve_papers import find_papers
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- from front.utils import serialize_docs,process_figures
50
 
51
- from climateqa.event_handler import init_audience, handle_retrieved_documents, stream_answer,handle_retrieved_owid_graphs
52
 
53
  # Load environment variables in local mode
54
  try:
@@ -57,7 +33,6 @@ try:
57
  except Exception as e:
58
  pass
59
 
60
- import requests
61
 
62
  # Set up Gradio Theme
63
  theme = gr.themes.Base(
@@ -66,15 +41,7 @@ theme = gr.themes.Base(
66
  font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
67
  )
68
 
69
-
70
-
71
- init_prompt = ""
72
-
73
- system_template = {
74
- "role": "system",
75
- "content": init_prompt,
76
- }
77
-
78
  account_key = os.environ["BLOB_ACCOUNT_KEY"]
79
  if len(account_key) == 86:
80
  account_key += "=="
@@ -92,586 +59,236 @@ share_client = service.get_share_client(file_share_name)
92
  user_id = create_user_id()
93
 
94
 
95
- CITATION_LABEL = "BibTeX citation for ClimateQ&A"
96
- CITATION_TEXT = r"""@misc{climateqa,
97
- author={Théo Alves Da Costa, Timothée Bohe},
98
- title={ClimateQ&A, AI-powered conversational assistant for climate change and biodiversity loss},
99
- year={2024},
100
- howpublished= {\url{https://climateqa.com}},
101
- }
102
- @software{climateqa,
103
- author = {Théo Alves Da Costa, Timothée Bohe},
104
- publisher = {ClimateQ&A},
105
- title = {ClimateQ&A, AI-powered conversational assistant for climate change and biodiversity loss},
106
- }
107
- """
108
-
109
-
110
 
111
  # Create vectorstore and retriever
112
- vectorstore = get_pinecone_vectorstore(embeddings_function, index_name = os.getenv("PINECONE_API_INDEX"))
113
- vectorstore_graphs = get_pinecone_vectorstore(embeddings_function, index_name = os.getenv("PINECONE_API_INDEX_OWID"), text_key="description")
 
 
114
 
115
  llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
116
- reranker = get_reranker("nano")
117
-
118
- agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, reranker=reranker)
119
-
120
- def update_config_modal_visibility(config_open):
121
- new_config_visibility_status = not config_open
122
- return gr.update(visible=new_config_visibility_status), new_config_visibility_status
123
-
124
- async def chat(query, history, audience, sources, reports, relevant_content_sources, search_only):
125
- """taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
126
- (messages in gradio format, messages in langchain format, source documents)"""
127
-
128
- date_now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
129
- print(f">> NEW QUESTION ({date_now}) : {query}")
130
-
131
- audience_prompt = init_audience(audience)
132
-
133
- # Prepare default values
134
- if sources is None or len(sources) == 0:
135
- sources = ["IPCC", "IPBES", "IPOS"]
136
-
137
- if reports is None or len(reports) == 0:
138
- reports = []
139
-
140
- inputs = {"user_input": query,"audience": audience_prompt,"sources_input":sources, "relevant_content_sources" : relevant_content_sources, "search_only": search_only}
141
- result = agent.astream_events(inputs,version = "v1")
142
-
143
-
144
- docs = []
145
- used_figures=[]
146
- related_contents = []
147
- docs_html = ""
148
- output_query = ""
149
- output_language = ""
150
- output_keywords = ""
151
- start_streaming = False
152
- graphs_html = ""
153
- figures = '<div class="figures-container"><p></p> </div>'
154
-
155
- steps_display = {
156
- "categorize_intent":("🔄️ Analyzing user message",True),
157
- "transform_query":("🔄️ Thinking step by step to answer the question",True),
158
- "retrieve_documents":("🔄️ Searching in the knowledge base",False),
159
- }
160
-
161
- used_documents = []
162
- answer_message_content = ""
163
- try:
164
- async for event in result:
165
- if "langgraph_node" in event["metadata"]:
166
- node = event["metadata"]["langgraph_node"]
167
-
168
- if event["event"] == "on_chain_end" and event["name"] == "retrieve_documents" :# when documents are retrieved
169
- docs, docs_html, history, used_documents, related_contents = handle_retrieved_documents(event, history, used_documents)
170
-
171
- elif event["event"] == "on_chain_end" and node == "categorize_intent" and event["name"] == "_write": # when the query is transformed
172
-
173
- intent = event["data"]["output"]["intent"]
174
- if "language" in event["data"]["output"]:
175
- output_language = event["data"]["output"]["language"]
176
- else :
177
- output_language = "English"
178
- history[-1].content = f"Language identified : {output_language} \n Intent identified : {intent}"
179
-
180
-
181
- elif event["name"] in steps_display.keys() and event["event"] == "on_chain_start": #display steps
182
- event_description, display_output = steps_display[node]
183
- if not hasattr(history[-1], 'metadata') or history[-1].metadata["title"] != event_description: # if a new step begins
184
- history.append(ChatMessage(role="assistant", content = "", metadata={'title' :event_description}))
185
-
186
- elif event["name"] != "transform_query" and event["event"] == "on_chat_model_stream" and node in ["answer_rag", "answer_search","answer_chitchat"]:# if streaming answer
187
- history, start_streaming, answer_message_content = stream_answer(history, event, start_streaming, answer_message_content)
188
-
189
- elif event["name"] in ["retrieve_graphs", "retrieve_graphs_ai"] and event["event"] == "on_chain_end":
190
- graphs_html = handle_retrieved_owid_graphs(event, graphs_html)
191
-
192
-
193
- if event["name"] == "transform_query" and event["event"] =="on_chain_end":
194
- if hasattr(history[-1],"content"):
195
- history[-1].content += "Decompose question into sub-questions: \n\n - " + "\n - ".join([q["question"] for q in event["data"]["output"]["remaining_questions"]])
196
-
197
- if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
198
- print("X")
199
-
200
- yield history, docs_html, output_query, output_language, related_contents , graphs_html, #,output_query,output_keywords
201
-
202
- except Exception as e:
203
- print(event, "has failed")
204
- raise gr.Error(f"{e}")
205
-
206
-
207
- try:
208
- # Log answer on Azure Blob Storage
209
- if os.getenv("GRADIO_ENV") != "local":
210
- timestamp = str(datetime.now().timestamp())
211
- file = timestamp + ".json"
212
- prompt = history[1]["content"]
213
- logs = {
214
- "user_id": str(user_id),
215
- "prompt": prompt,
216
- "query": prompt,
217
- "question":output_query,
218
- "sources":sources,
219
- "docs":serialize_docs(docs),
220
- "answer": history[-1].content,
221
- "time": timestamp,
222
- }
223
- log_on_azure(file, logs, share_client)
224
- except Exception as e:
225
- print(f"Error logging on Azure Blob Storage: {e}")
226
- raise gr.Error(f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)")
227
-
228
- yield history, docs_html, output_query, output_language, related_contents, graphs_html
229
-
230
-
231
- def save_feedback(feed: str, user_id):
232
- if len(feed) > 1:
233
- timestamp = str(datetime.now().timestamp())
234
- file = user_id + timestamp + ".json"
235
- logs = {
236
- "user_id": user_id,
237
- "feedback": feed,
238
- "time": timestamp,
239
- }
240
- log_on_azure(file, logs, share_client)
241
- return "Feedback submitted, thank you!"
242
-
243
-
244
-
245
-
246
- def log_on_azure(file, logs, share_client):
247
- logs = json.dumps(logs)
248
- file_client = share_client.get_file_client(file)
249
- file_client.upload_file(logs)
250
 
 
 
251
 
252
 
 
 
 
 
 
 
 
 
 
253
 
254
 
255
  # --------------------------------------------------------------------
256
  # Gradio
257
  # --------------------------------------------------------------------
258
 
 
 
 
 
 
259
 
260
- init_prompt = """
261
- Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports**.
262
-
263
- How to use
264
- - **Language**: You can ask me your questions in any language.
265
- - **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
266
- - **Sources**: You can choose to search in the IPCC or IPBES reports, or both.
267
- - **Relevant content sources**: You can choose to search for figures, papers, or graphs that can be relevant for your question.
268
-
269
- ⚠️ Limitations
270
- *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
271
-
272
- 🛈 Information
273
- Please note that we log your questions for meta-analysis purposes, so avoid sharing any sensitive or personal information.
274
-
275
-
276
- What do you want to learn ?
277
- """
278
-
279
-
280
- def vote(data: gr.LikeData):
281
- if data.liked:
282
- print(data.value)
283
- else:
284
- print(data)
285
-
286
- def save_graph(saved_graphs_state, embedding, category):
287
- print(f"\nCategory:\n{saved_graphs_state}\n")
288
- if category not in saved_graphs_state:
289
- saved_graphs_state[category] = []
290
- if embedding not in saved_graphs_state[category]:
291
- saved_graphs_state[category].append(embedding)
292
- return saved_graphs_state, gr.Button("Graph Saved")
293
 
 
294
 
295
 
296
- with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=theme,elem_id = "main-component") as demo:
297
- chat_completed_state = gr.State(0)
 
298
  current_graphs = gr.State([])
299
- saved_graphs = gr.State({})
300
- config_open = gr.State(False)
301
-
302
-
303
- with gr.Tab("ClimateQ&A"):
304
-
305
  with gr.Row(elem_id="chatbot-row"):
 
306
  with gr.Column(scale=2):
307
- chatbot = gr.Chatbot(
308
- value = [ChatMessage(role="assistant", content=init_prompt)],
309
- type = "messages",
310
- show_copy_button=True,
311
- show_label = False,
312
- elem_id="chatbot",
313
- layout = "panel",
314
- avatar_images = (None,"https://i.ibb.co/YNyd5W2/logo4.png"),
315
- max_height="80vh",
316
- height="100vh"
317
- )
318
-
319
- # bot.like(vote,None,None)
320
-
321
-
322
-
323
- with gr.Row(elem_id = "input-message"):
324
- textbox=gr.Textbox(placeholder="Ask me anything here!",show_label=False,scale=7,lines = 1,interactive = True,elem_id="input-textbox")
325
-
326
- config_button = gr.Button("",elem_id="config-button")
327
- # config_checkbox_button = gr.Checkbox(label = '⚙️', value="show",visible=True, interactive=True, elem_id="checkbox-config")
328
-
329
-
330
-
331
- with gr.Column(scale=2, variant="panel",elem_id = "right-panel"):
332
-
333
-
334
- with gr.Tabs(elem_id = "right_panel_tab") as tabs:
335
- with gr.TabItem("Examples",elem_id = "tab-examples",id = 0):
336
-
337
- examples_hidden = gr.Textbox(visible = False)
338
- first_key = list(QUESTIONS.keys())[0]
339
- dropdown_samples = gr.Dropdown(QUESTIONS.keys(),value = first_key,interactive = True,show_label = True,label = "Select a category of sample questions",elem_id = "dropdown-samples")
340
-
341
- samples = []
342
- for i,key in enumerate(QUESTIONS.keys()):
343
-
344
- examples_visible = True if i == 0 else False
345
-
346
- with gr.Row(visible = examples_visible) as group_examples:
347
-
348
- examples_questions = gr.Examples(
349
- QUESTIONS[key],
350
- [examples_hidden],
351
- examples_per_page=8,
352
- run_on_click=False,
353
- elem_id=f"examples{i}",
354
- api_name=f"examples{i}",
355
- # label = "Click on the example question or enter your own",
356
- # cache_examples=True,
357
- )
358
-
359
- samples.append(group_examples)
360
-
361
- # with gr.Tab("Configuration", id = 10, ) as tab_config:
362
- # # gr.Markdown("Reminders: You can talk in any language, ClimateQ&A is multi-lingual!")
363
-
364
- # pass
365
-
366
- # with gr.Row():
367
-
368
- # dropdown_sources = gr.CheckboxGroup(
369
- # ["IPCC", "IPBES","IPOS"],
370
- # label="Select source",
371
- # value=["IPCC"],
372
- # interactive=True,
373
- # )
374
- # dropdown_external_sources = gr.CheckboxGroup(
375
- # ["IPCC figures","OpenAlex", "OurWorldInData"],
376
- # label="Select database to search for relevant content",
377
- # value=["IPCC figures"],
378
- # interactive=True,
379
- # )
380
-
381
- # dropdown_reports = gr.Dropdown(
382
- # POSSIBLE_REPORTS,
383
- # label="Or select specific reports",
384
- # multiselect=True,
385
- # value=None,
386
- # interactive=True,
387
- # )
388
-
389
- # search_only = gr.Checkbox(label="Search only without chating", value=False, interactive=True, elem_id="checkbox-chat")
390
-
391
-
392
- # dropdown_audience = gr.Dropdown(
393
- # ["Children","General public","Experts"],
394
- # label="Select audience",
395
- # value="Experts",
396
- # interactive=True,
397
- # )
398
-
399
-
400
- # after = gr.Slider(minimum=1950,maximum=2023,step=1,value=1960,label="Publication date",show_label=True,interactive=True,elem_id="date-papers", visible=False)
401
-
402
 
403
- # output_query = gr.Textbox(label="Query used for retrieval",show_label = True,elem_id = "reformulated-query",lines = 2,interactive = False, visible= False)
404
- # output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False, visible= False)
 
 
 
 
405
 
 
 
 
406
 
407
- # dropdown_external_sources.change(lambda x: gr.update(visible = True ) if "OpenAlex" in x else gr.update(visible=False) , inputs=[dropdown_external_sources], outputs=[after])
408
- # # dropdown_external_sources.change(lambda x: gr.update(visible = True ) if "OpenAlex" in x else gr.update(visible=False) , inputs=[dropdown_external_sources], outputs=[after], visible=True)
409
 
 
 
 
 
 
 
410
 
411
- with gr.Tab("Sources",elem_id = "tab-sources",id = 1) as tab_sources:
412
- sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
413
-
414
-
415
-
416
- with gr.Tab("Recommended content", elem_id="tab-recommended_content",id=2) as tab_recommended_content:
417
- with gr.Tabs(elem_id = "group-subtabs") as tabs_recommended_content:
418
-
419
- with gr.Tab("Figures",elem_id = "tab-figures",id = 3) as tab_figures:
420
- sources_raw = gr.State()
421
-
422
- with Modal(visible=False, elem_id="modal_figure_galery") as figure_modal:
423
- gallery_component = gr.Gallery(object_fit='scale-down',elem_id="gallery-component", height="80vh")
424
-
425
- show_full_size_figures = gr.Button("Show figures in full size",elem_id="show-figures",interactive=True)
426
- show_full_size_figures.click(lambda : Modal(visible=True),None,figure_modal)
427
-
428
- figures_cards = gr.HTML(show_label=False, elem_id="sources-figures")
429
-
430
-
431
-
432
- with gr.Tab("Papers",elem_id = "tab-citations",id = 4) as tab_papers:
433
- # btn_summary = gr.Button("Summary")
434
- # Fenêtre simulée pour le Summary
435
- with gr.Accordion(visible=True, elem_id="papers-summary-popup", label= "See summary of relevant papers", open= False) as summary_popup:
436
- papers_summary = gr.Markdown("", visible=True, elem_id="papers-summary")
437
-
438
- # btn_relevant_papers = gr.Button("Relevant papers")
439
- # Fenêtre simulée pour les Relevant Papers
440
- with gr.Accordion(visible=True, elem_id="papers-relevant-popup",label= "See relevant papers", open= False) as relevant_popup:
441
- papers_html = gr.HTML(show_label=False, elem_id="papers-textbox")
442
-
443
- btn_citations_network = gr.Button("Explore papers citations network")
444
- # Fenêtre simulée pour le Citations Network
445
- with Modal(visible=False) as papers_modal:
446
- citations_network = gr.HTML("<h3>Citations Network Graph</h3>", visible=True, elem_id="papers-citations-network")
447
- btn_citations_network.click(lambda: Modal(visible=True), None, papers_modal)
448
-
449
-
450
-
451
  with gr.Tab("Graphs", elem_id="tab-graphs", id=5) as tab_graphs:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
 
453
- graphs_container = gr.HTML("<h2>There are no graphs to be displayed at the moment. Try asking another question.</h2>",elem_id="graphs-container")
454
- current_graphs.change(lambda x : x, inputs=[current_graphs], outputs=[graphs_container])
455
-
456
- with Modal(visible=False,elem_id="modal-config") as config_modal:
457
- gr.Markdown("Reminders: You can talk in any language, ClimateQ&A is multi-lingual!")
458
-
459
-
460
- # with gr.Row():
461
-
462
- dropdown_sources = gr.CheckboxGroup(
463
- ["IPCC", "IPBES","IPOS"],
464
- label="Select source (by default search in all sources)",
465
- value=["IPCC"],
466
- interactive=True,
467
- )
468
-
469
- dropdown_reports = gr.Dropdown(
470
- POSSIBLE_REPORTS,
471
- label="Or select specific reports",
472
- multiselect=True,
473
- value=None,
474
- interactive=True,
475
- )
476
-
477
- dropdown_external_sources = gr.CheckboxGroup(
478
- ["IPCC figures","OpenAlex", "OurWorldInData"],
479
- label="Select database to search for relevant content",
480
- value=["IPCC figures"],
481
- interactive=True,
482
- )
483
-
484
- search_only = gr.Checkbox(label="Search only for recommended content without chating", value=False, interactive=True, elem_id="checkbox-chat")
485
-
486
-
487
- dropdown_audience = gr.Dropdown(
488
- ["Children","General public","Experts"],
489
- label="Select audience",
490
- value="Experts",
491
- interactive=True,
492
- )
493
-
494
-
495
- after = gr.Slider(minimum=1950,maximum=2023,step=1,value=1960,label="Publication date",show_label=True,interactive=True,elem_id="date-papers", visible=False)
496
-
497
-
498
- output_query = gr.Textbox(label="Query used for retrieval",show_label = True,elem_id = "reformulated-query",lines = 2,interactive = False, visible= False)
499
- output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False, visible= False)
500
-
501
-
502
- dropdown_external_sources.change(lambda x: gr.update(visible = True ) if "OpenAlex" in x else gr.update(visible=False) , inputs=[dropdown_external_sources], outputs=[after])
503
-
504
- close_config_modal = gr.Button("Validate and Close",elem_id="close-config-modal")
505
- close_config_modal.click(fn=update_config_modal_visibility, inputs=[config_open], outputs=[config_modal, config_open])
506
- # dropdown_external_sources.change(lambda x: gr.update(visible = True ) if "OpenAlex" in x else gr.update(visible=False) , inputs=[dropdown_external_sources], outputs=[after], visible=True)
507
-
508
-
509
-
510
- config_button.click(fn=update_config_modal_visibility, inputs=[config_open], outputs=[config_modal, config_open])
511
-
512
- # with gr.Tab("OECD",elem_id = "tab-oecd",id = 6):
513
- # oecd_indicator = "RIVER_FLOOD_RP100_POP_SH"
514
- # oecd_topic = "climate"
515
- # oecd_latitude = "46.8332"
516
- # oecd_longitude = "5.3725"
517
- # oecd_zoom = "5.6442"
518
- # # Create the HTML content with the iframe
519
- # iframe_html = f"""
520
- # <iframe src="https://localdataportal.oecd.org/maps.html?indicator={oecd_indicator}&topic={oecd_topic}&latitude={oecd_latitude}&longitude={oecd_longitude}&zoom={oecd_zoom}"
521
- # width="100%" height="600" frameborder="0" style="border:0;" allowfullscreen></iframe>
522
- # """
523
- # oecd_textbox = gr.HTML(iframe_html, show_label=False, elem_id="oecd-textbox")
524
-
525
-
526
-
527
-
528
- #---------------------------------------------------------------------------------------
529
- # OTHER TABS
530
- #---------------------------------------------------------------------------------------
531
-
532
- # with gr.Tab("Settings",elem_id = "tab-config",id = 2):
533
-
534
- # gr.Markdown("Reminder: You can talk in any language, ClimateQ&A is multi-lingual!")
535
-
536
-
537
- # dropdown_sources = gr.CheckboxGroup(
538
- # ["IPCC", "IPBES","IPOS", "OpenAlex"],
539
- # label="Select source",
540
- # value=["IPCC"],
541
- # interactive=True,
542
- # )
543
-
544
- # dropdown_reports = gr.Dropdown(
545
- # POSSIBLE_REPORTS,
546
- # label="Or select specific reports",
547
- # multiselect=True,
548
- # value=None,
549
- # interactive=True,
550
- # )
551
-
552
- # dropdown_audience = gr.Dropdown(
553
- # ["Children","General public","Experts"],
554
- # label="Select audience",
555
- # value="Experts",
556
- # interactive=True,
557
- # )
558
-
559
-
560
- # output_query = gr.Textbox(label="Query used for retrieval",show_label = True,elem_id = "reformulated-query",lines = 2,interactive = False)
561
- # output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False)
562
-
563
-
564
- with gr.Tab("About",elem_classes = "max-height other-tabs"):
565
- with gr.Row():
566
- with gr.Column(scale=1):
567
-
568
-
569
-
570
-
571
- gr.Markdown(
572
- """
573
- ### More info
574
- - See more info at [https://climateqa.com](https://climateqa.com/docs/intro/)
575
- - Feedbacks on this [form](https://forms.office.com/e/1Yzgxm6jbp)
576
-
577
- ### Citation
578
- """
579
- )
580
- with gr.Accordion(CITATION_LABEL,elem_id="citation", open = False,):
581
- # # Display citation label and text)
582
- gr.Textbox(
583
- value=CITATION_TEXT,
584
- label="",
585
- interactive=False,
586
- show_copy_button=True,
587
- lines=len(CITATION_TEXT.split('\n')),
588
- )
589
-
590
-
591
-
592
- def start_chat(query,history,search_only):
593
- history = history + [ChatMessage(role="user", content=query)]
594
- if not search_only:
595
- return (gr.update(interactive = False),gr.update(selected=1),history)
596
- else:
597
- return (gr.update(interactive = False),gr.update(selected=2),history)
598
 
599
- def finish_chat():
600
- return gr.update(interactive = True,value = "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
601
 
602
- # Initialize visibility states
603
- summary_visible = False
604
- relevant_visible = False
605
-
606
- # Functions to toggle visibility
607
- def toggle_summary_visibility():
608
- global summary_visible
609
- summary_visible = not summary_visible
610
- return gr.update(visible=summary_visible)
611
-
612
- def toggle_relevant_visibility():
613
- global relevant_visible
614
- relevant_visible = not relevant_visible
615
- return gr.update(visible=relevant_visible)
616
-
617
-
618
- def change_completion_status(current_state):
619
- current_state = 1 - current_state
620
- return current_state
621
 
622
- def update_sources_number_display(sources_textbox, figures_cards, current_graphs, papers_html):
623
- sources_number = sources_textbox.count("<h2>")
624
- figures_number = figures_cards.count("<h2>")
625
- graphs_number = current_graphs.count("<iframe")
626
- papers_number = papers_html.count("<h2>")
627
- sources_notif_label = f"Sources ({sources_number})"
628
- figures_notif_label = f"Figures ({figures_number})"
629
- graphs_notif_label = f"Graphs ({graphs_number})"
630
- papers_notif_label = f"Papers ({papers_number})"
631
- recommended_content_notif_label = f"Recommended content ({figures_number + graphs_number + papers_number})"
632
-
633
- return gr.update(label = recommended_content_notif_label), gr.update(label = sources_notif_label), gr.update(label = figures_notif_label), gr.update(label = graphs_notif_label), gr.update(label = papers_notif_label)
634
 
635
- (textbox
636
- .submit(start_chat, [textbox,chatbot, search_only], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
637
- .then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, dropdown_external_sources, search_only] ,[chatbot,sources_textbox,output_query,output_language, sources_raw, current_graphs],concurrency_limit = 8,api_name = "chat_textbox")
638
- .then(finish_chat, None, [textbox],api_name = "finish_chat_textbox")
639
- # .then(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_sources, tab_figures, tab_graphs, tab_papers] )
640
- )
641
-
642
- (examples_hidden
643
- .change(start_chat, [examples_hidden,chatbot, search_only], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
644
- .then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, dropdown_external_sources, search_only] ,[chatbot,sources_textbox,output_query,output_language, sources_raw, current_graphs],concurrency_limit = 8,api_name = "chat_textbox")
645
- .then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
646
- # .then(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_sources, tab_figures, tab_graphs, tab_papers] )
647
- )
648
-
649
-
650
- def change_sample_questions(key):
651
- index = list(QUESTIONS.keys()).index(key)
652
- visible_bools = [False] * len(samples)
653
- visible_bools[index] = True
654
- return [gr.update(visible=visible_bools[i]) for i in range(len(samples))]
655
-
656
-
657
- sources_raw.change(process_figures, inputs=[sources_raw], outputs=[figures_cards, gallery_component])
658
 
659
- # update sources numbers
660
- sources_textbox.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
661
- figures_cards.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
662
- current_graphs.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
663
- papers_html.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
664
-
665
- # other questions examples
666
- dropdown_samples.change(change_sample_questions,dropdown_samples,samples)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
667
 
668
- # search for papers
669
- textbox.submit(find_papers,[textbox,after, dropdown_external_sources], [papers_html,citations_network,papers_summary])
670
- examples_hidden.change(find_papers,[examples_hidden,after,dropdown_external_sources], [papers_html,citations_network,papers_summary])
 
 
 
 
 
 
671
 
672
- # btn_summary.click(toggle_summary_visibility, outputs=summary_popup)
673
- # btn_relevant_papers.click(toggle_relevant_visibility, outputs=relevant_popup)
674
 
675
- demo.queue()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
676
 
 
 
677
  demo.launch(ssr_mode=False)
 
1
+ # Import necessary libraries
 
 
 
 
 
 
 
 
 
 
2
  import os
3
+ import gradio as gr
 
 
 
 
 
 
 
 
 
4
 
 
5
  from azure.storage.fileshare import ShareServiceClient
6
 
7
+ # Import custom modules
8
+ from climateqa.engine.embeddings import get_embeddings_function
 
 
 
 
 
 
 
9
  from climateqa.engine.llm import get_llm
10
  from climateqa.engine.vectorstore import get_pinecone_vectorstore
 
11
  from climateqa.engine.reranker import get_reranker
12
+ from climateqa.engine.graph import make_graph_agent,make_graph_agent_poc
 
 
 
 
 
 
13
  from climateqa.engine.chains.retrieve_papers import find_papers
14
+ from climateqa.chat import start_chat, chat_stream, finish_chat
15
+
16
+ from front.tabs import (create_config_modal, create_examples_tab, create_papers_tab, create_figures_tab, create_chat_interface, create_about_tab)
17
+ from front.utils import process_figures
18
+
19
+
20
+ from utils import create_user_id
21
+ import logging
22
+
23
+ logging.basicConfig(level=logging.WARNING)
24
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppresses INFO and WARNING logs
25
+ logging.getLogger().setLevel(logging.WARNING)
26
 
 
27
 
 
28
 
29
  # Load environment variables in local mode
30
  try:
 
33
  except Exception as e:
34
  pass
35
 
 
36
 
37
  # Set up Gradio Theme
38
  theme = gr.themes.Base(
 
41
  font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
42
  )
43
 
44
+ # Azure Blob Storage credentials
 
 
 
 
 
 
 
 
45
  account_key = os.environ["BLOB_ACCOUNT_KEY"]
46
  if len(account_key) == 86:
47
  account_key += "=="
 
59
  user_id = create_user_id()
60
 
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  # Create vectorstore and retriever
64
+ embeddings_function = get_embeddings_function()
65
+ vectorstore = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX"))
66
+ vectorstore_graphs = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX_OWID"), text_key="description")
67
+ vectorstore_region = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX_REGION"))
68
 
69
  llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
70
+ if os.getenv("ENV")=="GRADIO_ENV":
71
+ reranker = get_reranker("nano")
72
+ else:
73
+ reranker = get_reranker("large")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, vectorstore_region = vectorstore_region, reranker=reranker, threshold_docs=0.2)
76
+ agent_poc = make_graph_agent_poc(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, vectorstore_region = vectorstore_region, reranker=reranker, threshold_docs=0)#TODO put back default 0.2
77
 
78
 
79
+ async def chat(query, history, audience, sources, reports, relevant_content_sources_selection, search_only):
80
+ print("chat cqa - message received")
81
+ async for event in chat_stream(agent, query, history, audience, sources, reports, relevant_content_sources_selection, search_only, share_client, user_id):
82
+ yield event
83
+
84
+ async def chat_poc(query, history, audience, sources, reports, relevant_content_sources_selection, search_only):
85
+ print("chat poc - message received")
86
+ async for event in chat_stream(agent_poc, query, history, audience, sources, reports, relevant_content_sources_selection, search_only, share_client, user_id):
87
+ yield event
88
 
89
 
90
  # --------------------------------------------------------------------
91
  # Gradio
92
  # --------------------------------------------------------------------
93
 
94
+ # Function to update modal visibility
95
+ def update_config_modal_visibility(config_open):
96
+ new_config_visibility_status = not config_open
97
+ return gr.update(visible=new_config_visibility_status), new_config_visibility_status
98
+
99
 
100
+ def update_sources_number_display(sources_textbox, figures_cards, current_graphs, papers_html):
101
+ sources_number = sources_textbox.count("<h2>")
102
+ figures_number = figures_cards.count("<h2>")
103
+ graphs_number = current_graphs.count("<iframe")
104
+ papers_number = papers_html.count("<h2>")
105
+ sources_notif_label = f"Sources ({sources_number})"
106
+ figures_notif_label = f"Figures ({figures_number})"
107
+ graphs_notif_label = f"Graphs ({graphs_number})"
108
+ papers_notif_label = f"Papers ({papers_number})"
109
+ recommended_content_notif_label = f"Recommended content ({figures_number + graphs_number + papers_number})"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
+ return gr.update(label=recommended_content_notif_label), gr.update(label=sources_notif_label), gr.update(label=figures_notif_label), gr.update(label=graphs_notif_label), gr.update(label=papers_notif_label)
112
 
113
 
114
+ # # UI Layout Components
115
+ def cqa_tab(tab_name):
116
+ # State variables
117
  current_graphs = gr.State([])
118
+ with gr.Tab(tab_name):
 
 
 
 
 
119
  with gr.Row(elem_id="chatbot-row"):
120
+ # Left column - Chat interface
121
  with gr.Column(scale=2):
122
+ chatbot, textbox, config_button = create_chat_interface()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
+ # Right column - Content panels
125
+ with gr.Column(scale=2, variant="panel", elem_id="right-panel"):
126
+ with gr.Tabs(elem_id="right_panel_tab") as tabs:
127
+ # Examples tab
128
+ with gr.TabItem("Examples", elem_id="tab-examples", id=0):
129
+ examples_hidden = create_examples_tab()
130
 
131
+ # Sources tab
132
+ with gr.Tab("Sources", elem_id="tab-sources", id=1) as tab_sources:
133
+ sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
134
 
 
 
135
 
136
+ # Recommended content tab
137
+ with gr.Tab("Recommended content", elem_id="tab-recommended_content", id=2) as tab_recommended_content:
138
+ with gr.Tabs(elem_id="group-subtabs") as tabs_recommended_content:
139
+ # Figures subtab
140
+ with gr.Tab("Figures", elem_id="tab-figures", id=3) as tab_figures:
141
+ sources_raw, new_figures, used_figures, gallery_component, figures_cards, figure_modal = create_figures_tab()
142
 
143
+ # Papers subtab
144
+ with gr.Tab("Papers", elem_id="tab-citations", id=4) as tab_papers:
145
+ papers_summary, papers_html, citations_network, papers_modal = create_papers_tab()
146
+
147
+ # Graphs subtab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  with gr.Tab("Graphs", elem_id="tab-graphs", id=5) as tab_graphs:
149
+ graphs_container = gr.HTML(
150
+ "<h2>There are no graphs to be displayed at the moment. Try asking another question.</h2>",
151
+ elem_id="graphs-container"
152
+ )
153
+ return {
154
+ "chatbot": chatbot,
155
+ "textbox": textbox,
156
+ "tabs": tabs,
157
+ "sources_raw": sources_raw,
158
+ "new_figures": new_figures,
159
+ "current_graphs": current_graphs,
160
+ "examples_hidden": examples_hidden,
161
+ "sources_textbox": sources_textbox,
162
+ "figures_cards": figures_cards,
163
+ "gallery_component": gallery_component,
164
+ "config_button": config_button,
165
+ "papers_html": papers_html,
166
+ "citations_network": citations_network,
167
+ "papers_summary": papers_summary,
168
+ "tab_recommended_content": tab_recommended_content,
169
+ "tab_sources": tab_sources,
170
+ "tab_figures": tab_figures,
171
+ "tab_graphs": tab_graphs,
172
+ "tab_papers": tab_papers,
173
+ "graph_container": graphs_container
174
+ }
175
 
176
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
+ def event_handling(
179
+ main_tab_components,
180
+ config_components,
181
+ tab_name="ClimateQ&A"
182
+ ):
183
+ chatbot = main_tab_components["chatbot"]
184
+ textbox = main_tab_components["textbox"]
185
+ tabs = main_tab_components["tabs"]
186
+ sources_raw = main_tab_components["sources_raw"]
187
+ new_figures = main_tab_components["new_figures"]
188
+ current_graphs = main_tab_components["current_graphs"]
189
+ examples_hidden = main_tab_components["examples_hidden"]
190
+ sources_textbox = main_tab_components["sources_textbox"]
191
+ figures_cards = main_tab_components["figures_cards"]
192
+ gallery_component = main_tab_components["gallery_component"]
193
+ config_button = main_tab_components["config_button"]
194
+ papers_html = main_tab_components["papers_html"]
195
+ citations_network = main_tab_components["citations_network"]
196
+ papers_summary = main_tab_components["papers_summary"]
197
+ tab_recommended_content = main_tab_components["tab_recommended_content"]
198
+ tab_sources = main_tab_components["tab_sources"]
199
+ tab_figures = main_tab_components["tab_figures"]
200
+ tab_graphs = main_tab_components["tab_graphs"]
201
+ tab_papers = main_tab_components["tab_papers"]
202
+ graphs_container = main_tab_components["graph_container"]
203
 
204
+ config_open = config_components["config_open"]
205
+ config_modal = config_components["config_modal"]
206
+ dropdown_sources = config_components["dropdown_sources"]
207
+ dropdown_reports = config_components["dropdown_reports"]
208
+ dropdown_external_sources = config_components["dropdown_external_sources"]
209
+ search_only = config_components["search_only"]
210
+ dropdown_audience = config_components["dropdown_audience"]
211
+ after = config_components["after"]
212
+ output_query = config_components["output_query"]
213
+ output_language = config_components["output_language"]
214
+ close_config_modal = config_components["close_config_modal_button"]
 
 
 
 
 
 
 
 
215
 
216
+ new_sources_hmtl = gr.State([])
 
 
 
 
 
 
 
 
 
 
 
217
 
218
+ print("textbox id : ", textbox.elem_id)
219
+
220
+ for button in [config_button, close_config_modal]:
221
+ button.click(
222
+ fn=update_config_modal_visibility,
223
+ inputs=[config_open],
224
+ outputs=[config_modal, config_open]
225
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
 
227
+ if tab_name == "ClimateQ&A":
228
+ print("chat cqa - message sent")
229
+
230
+ # Event for textbox
231
+ (textbox
232
+ .submit(start_chat, [textbox, chatbot, search_only], [textbox, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{textbox.elem_id}")
233
+ .then(chat, [textbox, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{textbox.elem_id}")
234
+ .then(finish_chat, None, [textbox], api_name=f"finish_chat_{textbox.elem_id}")
235
+ )
236
+ # Event for examples_hidden
237
+ (examples_hidden
238
+ .change(start_chat, [examples_hidden, chatbot, search_only], [examples_hidden, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{examples_hidden.elem_id}")
239
+ .then(chat, [examples_hidden, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{examples_hidden.elem_id}")
240
+ .then(finish_chat, None, [textbox], api_name=f"finish_chat_{examples_hidden.elem_id}")
241
+ )
242
+
243
+ elif tab_name == "Beta - POC Adapt'Action":
244
+ print("chat poc - message sent")
245
+ # Event for textbox
246
+ (textbox
247
+ .submit(start_chat, [textbox, chatbot, search_only], [textbox, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{textbox.elem_id}")
248
+ .then(chat_poc, [textbox, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{textbox.elem_id}")
249
+ .then(finish_chat, None, [textbox], api_name=f"finish_chat_{textbox.elem_id}")
250
+ )
251
+ # Event for examples_hidden
252
+ (examples_hidden
253
+ .change(start_chat, [examples_hidden, chatbot, search_only], [examples_hidden, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{examples_hidden.elem_id}")
254
+ .then(chat_poc, [examples_hidden, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{examples_hidden.elem_id}")
255
+ .then(finish_chat, None, [textbox], api_name=f"finish_chat_{examples_hidden.elem_id}")
256
+ )
257
+
258
+
259
+ new_sources_hmtl.change(lambda x : x, inputs = [new_sources_hmtl], outputs = [sources_textbox])
260
+ current_graphs.change(lambda x: x, inputs=[current_graphs], outputs=[graphs_container])
261
+ new_figures.change(process_figures, inputs=[sources_raw, new_figures], outputs=[sources_raw, figures_cards, gallery_component])
262
 
263
+ # Update sources numbers
264
+ for component in [sources_textbox, figures_cards, current_graphs, papers_html]:
265
+ component.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs, papers_html], [tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
266
+
267
+ # Search for papers
268
+ for component in [textbox, examples_hidden]:
269
+ component.submit(find_papers, [component, after, dropdown_external_sources], [papers_html, citations_network, papers_summary])
270
+
271
+
272
 
 
 
273
 
274
+ def main_ui():
275
+ # config_open = gr.State(True)
276
+ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=theme, elem_id="main-component") as demo:
277
+ config_components = create_config_modal()
278
+
279
+ with gr.Tabs():
280
+ cqa_components = cqa_tab(tab_name = "ClimateQ&A")
281
+ local_cqa_components = cqa_tab(tab_name = "Beta - POC Adapt'Action")
282
+
283
+ create_about_tab()
284
+
285
+ event_handling(cqa_components, config_components, tab_name = 'ClimateQ&A')
286
+ event_handling(local_cqa_components, config_components, tab_name = 'Beta - POC Adapt\'Action')
287
+
288
+ demo.queue()
289
+
290
+ return demo
291
 
292
+
293
+ demo = main_ui()
294
  demo.launch(ssr_mode=False)
climateqa/chat.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datetime import datetime
3
+ import gradio as gr
4
+ # from .agent import agent
5
+ from gradio import ChatMessage
6
+ from langgraph.graph.state import CompiledStateGraph
7
+ import json
8
+
9
+ from .handle_stream_events import (
10
+ init_audience,
11
+ handle_retrieved_documents,
12
+ convert_to_docs_to_html,
13
+ stream_answer,
14
+ handle_retrieved_owid_graphs,
15
+ serialize_docs,
16
+ )
17
+
18
+ # Function to log data on Azure
19
+ def log_on_azure(file, logs, share_client):
20
+ logs = json.dumps(logs)
21
+ file_client = share_client.get_file_client(file)
22
+ file_client.upload_file(logs)
23
+
24
+ # Chat functions
25
+ def start_chat(query, history, search_only):
26
+ history = history + [ChatMessage(role="user", content=query)]
27
+ if not search_only:
28
+ return (gr.update(interactive=False), gr.update(selected=1), history, [])
29
+ else:
30
+ return (gr.update(interactive=False), gr.update(selected=2), history, [])
31
+
32
+ def finish_chat():
33
+ return gr.update(interactive=True, value="")
34
+
35
+ def log_interaction_to_azure(history, output_query, sources, docs, share_client, user_id):
36
+ try:
37
+ # Log interaction to Azure if not in local environment
38
+ if os.getenv("GRADIO_ENV") != "local":
39
+ timestamp = str(datetime.now().timestamp())
40
+ prompt = history[1]["content"]
41
+ logs = {
42
+ "user_id": str(user_id),
43
+ "prompt": prompt,
44
+ "query": prompt,
45
+ "question": output_query,
46
+ "sources": sources,
47
+ "docs": serialize_docs(docs),
48
+ "answer": history[-1].content,
49
+ "time": timestamp,
50
+ }
51
+ log_on_azure(f"{timestamp}.json", logs, share_client)
52
+ except Exception as e:
53
+ print(f"Error logging on Azure Blob Storage: {e}")
54
+ error_msg = f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)"
55
+ raise gr.Error(error_msg)
56
+
57
+ # Main chat function
58
+ async def chat_stream(
59
+ agent : CompiledStateGraph,
60
+ query: str,
61
+ history: list[ChatMessage],
62
+ audience: str,
63
+ sources: list[str],
64
+ reports: list[str],
65
+ relevant_content_sources_selection: list[str],
66
+ search_only: bool,
67
+ share_client,
68
+ user_id: str
69
+ ) -> tuple[list, str, str, str, list, str]:
70
+ """Process a chat query and return response with relevant sources and visualizations.
71
+
72
+ Args:
73
+ query (str): The user's question
74
+ history (list): Chat message history
75
+ audience (str): Target audience type
76
+ sources (list): Knowledge base sources to search
77
+ reports (list): Specific reports to search within sources
78
+ relevant_content_sources_selection (list): Types of content to retrieve (figures, papers, etc)
79
+ search_only (bool): Whether to only search without generating answer
80
+
81
+ Yields:
82
+ tuple: Contains:
83
+ - history: Updated chat history
84
+ - docs_html: HTML of retrieved documents
85
+ - output_query: Processed query
86
+ - output_language: Detected language
87
+ - related_contents: Related content
88
+ - graphs_html: HTML of relevant graphs
89
+ """
90
+ # Log incoming question
91
+ date_now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
92
+ print(f">> NEW QUESTION ({date_now}) : {query}")
93
+
94
+ audience_prompt = init_audience(audience)
95
+ sources = sources or ["IPCC", "IPBES"]
96
+ reports = reports or []
97
+
98
+ # Prepare inputs for agent
99
+ inputs = {
100
+ "user_input": query,
101
+ "audience": audience_prompt,
102
+ "sources_input": sources,
103
+ "relevant_content_sources_selection": relevant_content_sources_selection,
104
+ "search_only": search_only,
105
+ "reports": reports
106
+ }
107
+
108
+ # Get streaming events from agent
109
+ result = agent.astream_events(inputs, version="v1")
110
+
111
+ # Initialize state variables
112
+ docs = []
113
+ related_contents = []
114
+ docs_html = ""
115
+ new_docs_html = ""
116
+ output_query = ""
117
+ output_language = ""
118
+ output_keywords = ""
119
+ start_streaming = False
120
+ graphs_html = ""
121
+ used_documents = []
122
+ retrieved_contents = []
123
+ answer_message_content = ""
124
+
125
+ # Define processing steps
126
+ steps_display = {
127
+ "categorize_intent": ("🔄️ Analyzing user message", True),
128
+ "transform_query": ("🔄️ Thinking step by step to answer the question", True),
129
+ "retrieve_documents": ("🔄️ Searching in the knowledge base", False),
130
+ "retrieve_local_data": ("🔄️ Searching in the knowledge base", False),
131
+ }
132
+
133
+ try:
134
+ # Process streaming events
135
+ async for event in result:
136
+
137
+ if "langgraph_node" in event["metadata"]:
138
+ node = event["metadata"]["langgraph_node"]
139
+
140
+ # Handle document retrieval
141
+ if event["event"] == "on_chain_end" and event["name"] in ["retrieve_documents","retrieve_local_data"] and event["data"]["output"] != None:
142
+ history, used_documents, retrieved_contents = handle_retrieved_documents(
143
+ event, history, used_documents, retrieved_contents
144
+ )
145
+ if event["event"] == "on_chain_end" and event["name"] == "answer_search" :
146
+ docs = event["data"]["input"]["documents"]
147
+ docs_html = convert_to_docs_to_html(docs)
148
+ related_contents = event["data"]["input"]["related_contents"]
149
+
150
+ # Handle intent categorization
151
+ elif (event["event"] == "on_chain_end" and
152
+ node == "categorize_intent" and
153
+ event["name"] == "_write"):
154
+ intent = event["data"]["output"]["intent"]
155
+ output_language = event["data"]["output"].get("language", "English")
156
+ history[-1].content = f"Language identified: {output_language}\nIntent identified: {intent}"
157
+
158
+ # Handle processing steps display
159
+ elif event["name"] in steps_display and event["event"] == "on_chain_start":
160
+ event_description, display_output = steps_display[node]
161
+ if (not hasattr(history[-1], 'metadata') or
162
+ history[-1].metadata["title"] != event_description):
163
+ history.append(ChatMessage(
164
+ role="assistant",
165
+ content="",
166
+ metadata={'title': event_description}
167
+ ))
168
+
169
+ # Handle answer streaming
170
+ elif (event["name"] != "transform_query" and
171
+ event["event"] == "on_chat_model_stream" and
172
+ node in ["answer_rag","answer_rag_no_docs", "answer_search", "answer_chitchat"]):
173
+ history, start_streaming, answer_message_content = stream_answer(
174
+ history, event, start_streaming, answer_message_content
175
+ )
176
+
177
+ # Handle graph retrieval
178
+ elif event["name"] in ["retrieve_graphs", "retrieve_graphs_ai"] and event["event"] == "on_chain_end":
179
+ graphs_html = handle_retrieved_owid_graphs(event, graphs_html)
180
+
181
+ # Handle query transformation
182
+ if event["name"] == "transform_query" and event["event"] == "on_chain_end":
183
+ if hasattr(history[-1], "content"):
184
+ sub_questions = [q["question"] + "-> relevant sources : " + str(q["sources"]) for q in event["data"]["output"]["questions_list"]]
185
+ history[-1].content += "Decompose question into sub-questions:\n\n - " + "\n - ".join(sub_questions)
186
+
187
+ yield history, docs_html, output_query, output_language, related_contents, graphs_html
188
+
189
+ except Exception as e:
190
+ print(f"Event {event} has failed")
191
+ raise gr.Error(str(e))
192
+
193
+
194
+
195
+ # Call the function to log interaction
196
+ log_interaction_to_azure(history, output_query, sources, docs, share_client, user_id)
197
+
198
+ yield history, docs_html, output_query, output_language, related_contents, graphs_html
climateqa/constants.py CHANGED
@@ -1,4 +1,6 @@
1
  POSSIBLE_REPORTS = [
 
 
2
  "IPCC AR6 WGI SPM",
3
  "IPCC AR6 WGI FR",
4
  "IPCC AR6 WGI TS",
 
1
  POSSIBLE_REPORTS = [
2
+ "IPBES IABWFH SPM",
3
+ "IPBES CBL SPM",
4
  "IPCC AR6 WGI SPM",
5
  "IPCC AR6 WGI FR",
6
  "IPCC AR6 WGI TS",
climateqa/engine/chains/answer_rag.py CHANGED
@@ -11,7 +11,7 @@ import time
11
  from ..utils import rename_chain, pass_values
12
 
13
 
14
- DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
15
 
16
  def _combine_documents(
17
  docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, sep="\n\n"
@@ -61,10 +61,11 @@ def make_rag_node(llm,with_docs = True):
61
  rag_chain = make_rag_chain(llm)
62
  else:
63
  rag_chain = make_rag_chain_without_docs(llm)
64
-
65
  async def answer_rag(state,config):
66
  print("---- Answer RAG ----")
67
  start_time = time.time()
 
68
 
69
  answer = await rag_chain.ainvoke(state,config)
70
 
 
11
  from ..utils import rename_chain, pass_values
12
 
13
 
14
+ DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="Source : {source} - Content : {page_content}")
15
 
16
  def _combine_documents(
17
  docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, sep="\n\n"
 
61
  rag_chain = make_rag_chain(llm)
62
  else:
63
  rag_chain = make_rag_chain_without_docs(llm)
64
+
65
  async def answer_rag(state,config):
66
  print("---- Answer RAG ----")
67
  start_time = time.time()
68
+ print("Sources used : " + "\n".join([x.metadata["short_name"] + " - page " + str(x.metadata["page_number"]) for x in state["documents"]]))
69
 
70
  answer = await rag_chain.ainvoke(state,config)
71
 
climateqa/engine/chains/graph_retriever.py CHANGED
@@ -50,7 +50,9 @@ def make_graph_retriever_node(vectorstore, reranker, rerank_by_question=True, k_
50
  print("---- Retrieving graphs ----")
51
 
52
  POSSIBLE_SOURCES = ["IEA", "OWID"]
53
- questions = state["remaining_questions"] if state["remaining_questions"] is not None and state["remaining_questions"]!=[] else [state["query"]]
 
 
54
  # sources_input = state["sources_input"]
55
  sources_input = ["auto"]
56
 
 
50
  print("---- Retrieving graphs ----")
51
 
52
  POSSIBLE_SOURCES = ["IEA", "OWID"]
53
+ # questions = state["remaining_questions"] if state["remaining_questions"] is not None and state["remaining_questions"]!=[] else [state["query"]]
54
+ questions = state["questions_list"] if state["questions_list"] is not None and state["questions_list"]!=[] else [state["query"]]
55
+
56
  # sources_input = state["sources_input"]
57
  sources_input = ["auto"]
58
 
climateqa/engine/chains/prompts.py CHANGED
@@ -36,13 +36,40 @@ You are given a question and extracted passages of the IPCC and/or IPBES reports
36
  """
37
 
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  answer_prompt_template = """
40
- You are ClimateQ&A, an AI Assistant created by Ekimetrics. You are given a question and extracted passages of the IPCC and/or IPBES reports. Provide a clear and structured answer based on the passages provided, the context and the guidelines.
41
 
42
  Guidelines:
43
  - If the passages have useful facts or numbers, use them in your answer.
44
  - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
45
- - Do not use the sentence 'Doc i says ...' to say where information came from.
 
 
 
46
  - If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]
47
  - Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.
48
  - If it makes sense, use bullet points and lists to make your answers easier to understand.
@@ -51,6 +78,7 @@ Guidelines:
51
  - Consider by default that the question is about the past century unless it is specified otherwise.
52
  - If the passage is the caption of a picture, you can still use it as part of your answer as any other document.
53
 
 
54
  -----------------------
55
  Passages:
56
  {context}
@@ -60,7 +88,6 @@ Question: {query} - Explained to {audience}
60
  Answer in {language} with the passages citations:
61
  """
62
 
63
-
64
  papers_prompt_template = """
65
  You are ClimateQ&A, an AI Assistant created by Ekimetrics. You are given a question and extracted abstracts of scientific papers. Provide a clear and structured answer based on the abstracts provided, the context and the guidelines.
66
 
 
36
  """
37
 
38
 
39
+ # answer_prompt_template_old = """
40
+ # You are ClimateQ&A, an AI Assistant created by Ekimetrics. You are given a question and extracted passages of reports. Provide a clear and structured answer based on the passages provided, the context and the guidelines.
41
+
42
+ # Guidelines:
43
+ # - If the passages have useful facts or numbers, use them in your answer.
44
+ # - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
45
+ # - Do not use the sentence 'Doc i says ...' to say where information came from.
46
+ # - If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]
47
+ # - Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.
48
+ # - If it makes sense, use bullet points and lists to make your answers easier to understand.
49
+ # - You do not need to use every passage. Only use the ones that help answer the question.
50
+ # - If the documents do not have the information needed to answer the question, just say you do not have enough information.
51
+ # - Consider by default that the question is about the past century unless it is specified otherwise.
52
+ # - If the passage is the caption of a picture, you can still use it as part of your answer as any other document.
53
+
54
+ # -----------------------
55
+ # Passages:
56
+ # {context}
57
+
58
+ # -----------------------
59
+ # Question: {query} - Explained to {audience}
60
+ # Answer in {language} with the passages citations:
61
+ # """
62
+
63
  answer_prompt_template = """
64
+ You are ClimateQ&A, an AI Assistant created by Ekimetrics. You are given a question and extracted passages of reports. Provide a clear and structured answer based on the passages provided, the context and the guidelines.
65
 
66
  Guidelines:
67
  - If the passages have useful facts or numbers, use them in your answer.
68
  - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
69
+ - You will receive passages from different reports, eg IPCC and PPCP, make separate paragraphs and specify the source of the information in your answer, eg "According to IPCC, ...".
70
+ - The different sources are IPCC, IPBES, PPCP (for Plan Climat Air Energie Territorial de Paris), PBDP (for Plan Biodiversité de Paris), Acclimaterra.
71
+ - Do not mention that you are using specific extract documents, but mention only the source information. "According to IPCC, ..." rather than "According to the provided document from IPCC ..."
72
+ - Make a clear distinction between information from IPCC, IPBES, Acclimaterra that are scientific reports and PPCP, PBDP that are strategic reports. Strategic reports should not be taken has verified facts, but as political or strategic decisions.
73
  - If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]
74
  - Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.
75
  - If it makes sense, use bullet points and lists to make your answers easier to understand.
 
78
  - Consider by default that the question is about the past century unless it is specified otherwise.
79
  - If the passage is the caption of a picture, you can still use it as part of your answer as any other document.
80
 
81
+
82
  -----------------------
83
  Passages:
84
  {context}
 
88
  Answer in {language} with the passages citations:
89
  """
90
 
 
91
  papers_prompt_template = """
92
  You are ClimateQ&A, an AI Assistant created by Ekimetrics. You are given a question and extracted abstracts of scientific papers. Provide a clear and structured answer based on the abstracts provided, the context and the guidelines.
93
 
climateqa/engine/chains/query_transformation.py CHANGED
@@ -7,43 +7,7 @@ from langchain.prompts import ChatPromptTemplate
7
  from langchain_core.utils.function_calling import convert_to_openai_function
8
  from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
9
 
10
-
11
- ROUTING_INDEX = {
12
- "Vector":["IPCC","IPBES","IPOS"],
13
- "OpenAlex":["OpenAlex"],
14
- }
15
-
16
- POSSIBLE_SOURCES = [y for values in ROUTING_INDEX.values() for y in values]
17
-
18
- # Prompt from the original paper https://arxiv.org/pdf/2305.14283
19
- # Query Rewriting for Retrieval-Augmented Large Language Models
20
- class QueryDecomposition(BaseModel):
21
- """
22
- Decompose the user query into smaller parts to think step by step to answer this question
23
- Act as a simple planning agent
24
- """
25
-
26
- questions: List[str] = Field(
27
- description="""
28
- Think step by step to answer this question, and provide one or several search engine questions in English for knowledge that you need.
29
- Suppose that the user is looking for information about climate change, energy, biodiversity, nature, and everything we can find the IPCC reports and scientific literature
30
- - If it's already a standalone and explicit question, just return the reformulated question for the search engine
31
- - If you need to decompose the question, output a list of maximum 2 to 3 questions
32
- """
33
- )
34
-
35
-
36
- class Location(BaseModel):
37
- country:str = Field(...,description="The country if directly mentioned or inferred from the location (cities, regions, adresses), ex: France, USA, ...")
38
- location:str = Field(...,description="The specific place if mentioned (cities, regions, addresses), ex: Marseille, New York, Wisconsin, ...")
39
-
40
- class QueryAnalysis(BaseModel):
41
- """
42
- Analyzing the user query to extract topics, sources and date
43
- Also do query expansion to get alternative search queries
44
- Also provide simple keywords to feed a search engine
45
- """
46
-
47
  # keywords: List[str] = Field(
48
  # description="""
49
  # Extract the keywords from the user query to feed a search engine as a list
@@ -68,17 +32,10 @@ class QueryAnalysis(BaseModel):
68
  # This questions should help you get more context and information about the user query
69
  # """
70
  # )
71
-
72
- sources: List[Literal["IPCC", "IPBES", "IPOS"]] = Field( #,"OpenAlex"]] = Field(
73
- ...,
74
- description="""
75
- Given a user question choose which documents would be most relevant for answering their question,
76
- - IPCC is for questions about climate change, energy, impacts, and everything we can find the IPCC reports
77
- - IPBES is for questions about biodiversity and nature
78
- - IPOS is for questions about the ocean and deep sea mining
79
- """,
80
- # - OpenAlex is for any other questions that are not in the previous categories but could be found in the scientific litterature
81
- )
82
  # topics: List[Literal[
83
  # "Climate change",
84
  # "Biodiversity",
@@ -101,7 +58,82 @@ class QueryAnalysis(BaseModel):
101
  # location:Location
102
 
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  def make_query_decomposition_chain(llm):
 
 
 
 
 
 
 
 
105
 
106
  openai_functions = [convert_to_openai_function(QueryDecomposition)]
107
  llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"QueryDecomposition"})
@@ -115,7 +147,8 @@ def make_query_decomposition_chain(llm):
115
  return chain
116
 
117
 
118
- def make_query_rewriter_chain(llm):
 
119
 
120
  openai_functions = [convert_to_openai_function(QueryAnalysis)]
121
  llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"QueryAnalysis"})
@@ -123,7 +156,7 @@ def make_query_rewriter_chain(llm):
123
 
124
 
125
  prompt = ChatPromptTemplate.from_messages([
126
- ("system", "You are a helpful assistant, you will analyze, translate and reformulate the user input message using the function provided"),
127
  ("user", "input: {input}")
128
  ])
129
 
@@ -132,22 +165,63 @@ def make_query_rewriter_chain(llm):
132
  return chain
133
 
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  def make_query_transform_node(llm,k_final=15):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
  decomposition_chain = make_query_decomposition_chain(llm)
138
- rewriter_chain = make_query_rewriter_chain(llm)
 
139
 
140
  def transform_query(state):
141
  print("---- Transform query ----")
142
 
143
-
144
- if "sources_auto" not in state or state["sources_auto"] is None or state["sources_auto"] is False:
145
- auto_mode = False
146
- else:
147
- auto_mode = True
148
-
149
- sources_input = state.get("sources_input")
150
- if sources_input is None: sources_input = ROUTING_INDEX["Vector"]
151
 
152
  new_state = {}
153
 
@@ -155,24 +229,41 @@ def make_query_transform_node(llm,k_final=15):
155
  decomposition_output = decomposition_chain.invoke({"input":state["query"]})
156
  new_state.update(decomposition_output)
157
 
 
158
  # Query Analysis
159
  questions = []
160
  for question in new_state["questions"]:
161
  question_state = {"question":question}
162
- analysis_output = rewriter_chain.invoke({"input":question})
163
 
164
  # TODO WARNING llm should always return smthg
165
- # The case when the llm does not return any sources
166
- if not analysis_output["sources"] or not all(source in ["IPCC", "IPBS", "IPOS"] for source in analysis_output["sources"]):
167
- analysis_output["sources"] = ["IPCC", "IPBES", "IPOS"]
 
 
 
 
 
 
 
 
 
 
168
 
169
- question_state.update(analysis_output)
170
- questions.append(question_state)
 
 
 
 
 
 
171
 
172
  # Explode the questions into multiple questions with different sources
173
  new_questions = []
174
  for q in questions:
175
- question,sources = q["question"],q["sources"]
176
 
177
  # If not auto mode we take the configuration
178
  if not auto_mode:
@@ -181,7 +272,7 @@ def make_query_transform_node(llm,k_final=15):
181
  for index,index_sources in ROUTING_INDEX.items():
182
  selected_sources = list(set(sources).intersection(index_sources))
183
  if len(selected_sources) > 0:
184
- new_questions.append({"question":question,"sources":selected_sources,"index":index})
185
 
186
  # # Add the number of questions to search
187
  # k_by_question = k_final // len(new_questions)
@@ -191,10 +282,16 @@ def make_query_transform_node(llm,k_final=15):
191
  # new_state["questions"] = new_questions
192
  # new_state["remaining_questions"] = new_questions
193
 
 
 
 
 
 
194
 
195
  new_state = {
196
- "remaining_questions":new_questions,
197
- "n_questions":len(new_questions),
 
198
  }
199
  return new_state
200
 
 
7
  from langchain_core.utils.function_calling import convert_to_openai_function
8
  from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
9
 
10
+ # OLD QUERY ANALYSIS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  # keywords: List[str] = Field(
12
  # description="""
13
  # Extract the keywords from the user query to feed a search engine as a list
 
32
  # This questions should help you get more context and information about the user query
33
  # """
34
  # )
35
+ # - OpenAlex is for any other questions that are not in the previous categories but could be found in the scientific litterature
36
+ #
37
+
38
+
 
 
 
 
 
 
 
39
  # topics: List[Literal[
40
  # "Climate change",
41
  # "Biodiversity",
 
58
  # location:Location
59
 
60
 
61
+
62
+ ROUTING_INDEX = {
63
+ "IPx":["IPCC", "IPBES", "IPOS"],
64
+ "POC": ["AcclimaTerra", "PCAET","Biodiv"],
65
+ "OpenAlex":["OpenAlex"],
66
+ }
67
+
68
+ POSSIBLE_SOURCES = [y for values in ROUTING_INDEX.values() for y in values]
69
+
70
+ # Prompt from the original paper https://arxiv.org/pdf/2305.14283
71
+ # Query Rewriting for Retrieval-Augmented Large Language Models
72
+ class QueryDecomposition(BaseModel):
73
+ """
74
+ Decompose the user query into smaller parts to think step by step to answer this question
75
+ Act as a simple planning agent
76
+ """
77
+
78
+ questions: List[str] = Field(
79
+ description="""
80
+ Think step by step to answer this question, and provide one or several search engine questions in the provided language for knowledge that you need.
81
+ Suppose that the user is looking for information about climate change, energy, biodiversity, nature, and everything we can find the IPCC reports and scientific literature
82
+ - If it's already a standalone and explicit question, just return the reformulated question for the search engine
83
+ - If you need to decompose the question, output a list of maximum 2 to 3 questions
84
+ """
85
+ )
86
+
87
+
88
+ class Location(BaseModel):
89
+ country:str = Field(...,description="The country if directly mentioned or inferred from the location (cities, regions, adresses), ex: France, USA, ...")
90
+ location:str = Field(...,description="The specific place if mentioned (cities, regions, addresses), ex: Marseille, New York, Wisconsin, ...")
91
+
92
+ class QueryTranslation(BaseModel):
93
+ """Translate the query into a given language"""
94
+
95
+ question : str = Field(
96
+ description="""
97
+ Translate the questions into the given language
98
+ If the question is alrealdy in the given language, just return the same question
99
+ """,
100
+ )
101
+
102
+
103
+ class QueryAnalysis(BaseModel):
104
+ """
105
+ Analyze the user query to extract the relevant sources
106
+
107
+ Deprecated:
108
+ Analyzing the user query to extract topics, sources and date
109
+ Also do query expansion to get alternative search queries
110
+ Also provide simple keywords to feed a search engine
111
+ """
112
+
113
+ sources: List[Literal["IPCC", "IPBES", "IPOS", "AcclimaTerra", "PCAET","Biodiv"]] = Field( #,"OpenAlex"]] = Field(
114
+ ...,
115
+ description="""
116
+ Given a user question choose which documents would be most relevant for answering their question,
117
+ - IPCC is for questions about climate change, energy, impacts, and everything we can find the IPCC reports
118
+ - IPBES is for questions about biodiversity and nature
119
+ - IPOS is for questions about the ocean and deep sea mining
120
+ - AcclimaTerra is for questions about any specific place in, or close to, the french region "Nouvelle-Aquitaine"
121
+ - PCAET is the Plan Climat Eneregie Territorial for the city of Paris
122
+ - Biodiv is the Biodiversity plan for the city of Paris
123
+ """,
124
+ )
125
+
126
+
127
+
128
  def make_query_decomposition_chain(llm):
129
+ """Chain to decompose a query into smaller parts to think step by step to answer this question
130
+
131
+ Args:
132
+ llm (_type_): _description_
133
+
134
+ Returns:
135
+ _type_: _description_
136
+ """
137
 
138
  openai_functions = [convert_to_openai_function(QueryDecomposition)]
139
  llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"QueryDecomposition"})
 
147
  return chain
148
 
149
 
150
+ def make_query_analysis_chain(llm):
151
+ """Analyze the user query to extract the relevant sources"""
152
 
153
  openai_functions = [convert_to_openai_function(QueryAnalysis)]
154
  llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"QueryAnalysis"})
 
156
 
157
 
158
  prompt = ChatPromptTemplate.from_messages([
159
+ ("system", "You are a helpful assistant, you will analyze the user input message using the function provided"),
160
  ("user", "input: {input}")
161
  ])
162
 
 
165
  return chain
166
 
167
 
168
+ def make_query_translation_chain(llm):
169
+ """Analyze the user query to extract the relevant sources"""
170
+
171
+ openai_functions = [convert_to_openai_function(QueryTranslation)]
172
+ llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"QueryTranslation"})
173
+
174
+
175
+
176
+ prompt = ChatPromptTemplate.from_messages([
177
+ ("system", "You are a helpful assistant, translate the question into {language}"),
178
+ ("user", "input: {input}")
179
+ ])
180
+
181
+
182
+ chain = prompt | llm_with_functions | JsonOutputFunctionsParser()
183
+ return chain
184
+
185
+ def group_by_sources_types(sources):
186
+ sources_types = {}
187
+ IPx_sources = ["IPCC", "IPBES", "IPOS"]
188
+ local_sources = ["AcclimaTerra", "PCAET","Biodiv"]
189
+ if any(source in IPx_sources for source in sources):
190
+ sources_types["IPx"] = list(set(sources).intersection(IPx_sources))
191
+ if any(source in local_sources for source in sources):
192
+ sources_types["POC"] = list(set(sources).intersection(local_sources))
193
+ return sources_types
194
+
195
+
196
  def make_query_transform_node(llm,k_final=15):
197
+ """
198
+ Creates a query transformation node that processes and transforms a given query state.
199
+ Args:
200
+ llm: The language model to be used for query decomposition and rewriting.
201
+ k_final (int, optional): The final number of questions to be generated. Defaults to 15.
202
+ Returns:
203
+ function: A function that takes a query state and returns a transformed state.
204
+ The returned function performs the following steps:
205
+ 1. Checks if the query should be processed in auto mode based on the state.
206
+ 2. Retrieves the input sources from the state or defaults to a predefined routing index.
207
+ 3. Decomposes the query using the decomposition chain.
208
+ 4. Analyzes each decomposed question using the rewriter chain.
209
+ 5. Ensures that the sources returned by the language model are valid.
210
+ 6. Explodes the questions into multiple questions with different sources based on the mode.
211
+ 7. Constructs a new state with the transformed questions and their respective sources.
212
+ """
213
+
214
 
215
  decomposition_chain = make_query_decomposition_chain(llm)
216
+ query_analysis_chain = make_query_analysis_chain(llm)
217
+ query_translation_chain = make_query_translation_chain(llm)
218
 
219
  def transform_query(state):
220
  print("---- Transform query ----")
221
 
222
+ auto_mode = state.get("sources_auto", True)
223
+ sources_input = state.get("sources_input", ROUTING_INDEX["IPx"])
224
+
 
 
 
 
 
225
 
226
  new_state = {}
227
 
 
229
  decomposition_output = decomposition_chain.invoke({"input":state["query"]})
230
  new_state.update(decomposition_output)
231
 
232
+
233
  # Query Analysis
234
  questions = []
235
  for question in new_state["questions"]:
236
  question_state = {"question":question}
237
+ query_analysis_output = query_analysis_chain.invoke({"input":question})
238
 
239
  # TODO WARNING llm should always return smthg
240
+ # The case when the llm does not return any sources or wrong ouput
241
+ if not query_analysis_output["sources"] or not all(source in ["IPCC", "IPBS", "IPOS","AcclimaTerra", "PCAET","Biodiv"] for source in query_analysis_output["sources"]):
242
+ query_analysis_output["sources"] = ["IPCC", "IPBES", "IPOS"]
243
+
244
+ sources_types = group_by_sources_types(query_analysis_output["sources"])
245
+ for source_type,sources in sources_types.items():
246
+ question_state = {
247
+ "question":question,
248
+ "sources":sources,
249
+ "source_type":source_type
250
+ }
251
+
252
+ questions.append(question_state)
253
 
254
+ # Translate question into the document language
255
+ for q in questions:
256
+ if q["source_type"]=="IPx":
257
+ translation_output = query_translation_chain.invoke({"input":q["question"],"language":"English"})
258
+ q["question"] = translation_output["question"]
259
+ elif q["source_type"]=="POC":
260
+ translation_output = query_translation_chain.invoke({"input":q["question"],"language":"French"})
261
+ q["question"] = translation_output["question"]
262
 
263
  # Explode the questions into multiple questions with different sources
264
  new_questions = []
265
  for q in questions:
266
+ question,sources,source_type = q["question"],q["sources"], q["source_type"]
267
 
268
  # If not auto mode we take the configuration
269
  if not auto_mode:
 
272
  for index,index_sources in ROUTING_INDEX.items():
273
  selected_sources = list(set(sources).intersection(index_sources))
274
  if len(selected_sources) > 0:
275
+ new_questions.append({"question":question,"sources":selected_sources,"index":index, "source_type":source_type})
276
 
277
  # # Add the number of questions to search
278
  # k_by_question = k_final // len(new_questions)
 
282
  # new_state["questions"] = new_questions
283
  # new_state["remaining_questions"] = new_questions
284
 
285
+ n_questions = {
286
+ "total":len(new_questions),
287
+ "IPx":len([q for q in new_questions if q["index"] == "IPx"]),
288
+ "POC":len([q for q in new_questions if q["index"] == "POC"]),
289
+ }
290
 
291
  new_state = {
292
+ "questions_list":new_questions,
293
+ "n_questions":n_questions,
294
+ "handled_questions_index":[],
295
  }
296
  return new_state
297
 
climateqa/engine/chains/retrieve_documents.py CHANGED
@@ -7,7 +7,7 @@ from langchain_core.runnables import chain
7
  from langchain_core.runnables import RunnableParallel, RunnablePassthrough
8
  from langchain_core.runnables import RunnableLambda
9
 
10
- from ..reranker import rerank_docs
11
  # from ...knowledge.retriever import ClimateQARetriever
12
  from ...knowledge.openalex import OpenAlexRetriever
13
  from .keywords_extraction import make_keywords_extraction_chain
@@ -15,7 +15,9 @@ from ..utils import log_event
15
  from langchain_core.vectorstores import VectorStore
16
  from typing import List
17
  from langchain_core.documents.base import Document
 
18
 
 
19
 
20
 
21
  def divide_into_parts(target, parts):
@@ -87,7 +89,7 @@ def _get_k_images_by_question(n_questions):
87
  elif n_questions == 2:
88
  return 5
89
  elif n_questions == 3:
90
- return 2
91
  else:
92
  return 1
93
 
@@ -98,11 +100,77 @@ def _add_metadata_and_score(docs: List) -> Document:
98
  doc.page_content = doc.page_content.replace("\r\n"," ")
99
  doc.metadata["similarity_score"] = score
100
  doc.metadata["content"] = doc.page_content
101
- doc.metadata["page_number"] = int(doc.metadata["page_number"]) + 1
 
 
 
102
  # doc.page_content = f"""Doc {i+1} - {doc.metadata['short_name']}: {doc.page_content}"""
103
  docs_with_metadata.append(doc)
104
  return docs_with_metadata
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  async def get_IPCC_relevant_documents(
107
  query: str,
108
  vectorstore:VectorStore,
@@ -164,8 +232,7 @@ async def get_IPCC_relevant_documents(
164
  "chunk_type":"text",
165
  "report_type": { "$nin":["SPM"]},
166
  }
167
- k_full = k_total - len(docs_summaries)
168
- docs_full = vectorstore.similarity_search_with_score(query=query,filter = filters_full,k = k_full)
169
 
170
  if search_figures:
171
  # Images
@@ -188,15 +255,45 @@ async def get_IPCC_relevant_documents(
188
  }
189
 
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
  # The chain callback is not necessary, but it propagates the langchain callbacks to the astream_events logger to display intermediate results
193
  # @chain
194
- async def retrieve_documents(state,config, vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5, k_images=5):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  """
196
- Retrieve and rerank documents based on the current question in the state.
197
 
198
  Args:
199
  state (dict): The current state containing documents, related content, relevant content sources, remaining questions and n_questions.
 
200
  config (dict): Configuration settings for logging and other purposes.
201
  vectorstore (object): The vector store used to retrieve relevant documents.
202
  reranker (object): The reranker used to rerank the retrieved documents.
@@ -209,95 +306,160 @@ async def retrieve_documents(state,config, vectorstore,reranker,llm,rerank_by_qu
209
  Returns:
210
  dict: The updated state containing the retrieved and reranked documents, related content, and remaining questions.
211
  """
212
- print("---- Retrieve documents ----")
213
-
214
- # Get the documents from the state
215
- if "documents" in state and state["documents"] is not None:
216
- docs = state["documents"]
217
- else:
218
- docs = []
219
- # Get the related_content from the state
220
- if "related_content" in state and state["related_content"] is not None:
221
- related_content = state["related_content"]
222
- else:
223
- related_content = []
224
-
225
- search_figures = "IPCC figures" in state["relevant_content_sources"]
226
- search_only = state["search_only"]
227
-
228
- # Get the current question
229
- current_question = state["remaining_questions"][0]
230
- remaining_questions = state["remaining_questions"][1:]
231
-
232
- k_by_question = k_final // state["n_questions"]
233
- k_summary_by_question = _get_k_summary_by_question(state["n_questions"])
234
- k_images_by_question = _get_k_images_by_question(state["n_questions"])
235
-
236
  sources = current_question["sources"]
237
  question = current_question["question"]
238
  index = current_question["index"]
 
239
 
240
  print(f"Retrieve documents for question: {question}")
241
  await log_event({"question":question,"sources":sources,"index":index},"log_retriever",config)
242
 
 
243
 
244
- if index == "Vector": # always true for now
245
  docs_question_dict = await get_IPCC_relevant_documents(
246
  query = question,
247
  vectorstore=vectorstore,
248
  search_figures = search_figures,
249
  sources = sources,
250
  min_size = 200,
251
- k_summary = k_summary_by_question,
252
  k_total = k_before_reranking,
253
  k_images = k_images_by_question,
254
  threshold = 0.5,
255
  search_only = search_only,
 
256
  )
257
-
258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  # Rerank
260
- if reranker is not None:
261
  with suppress_output():
262
- docs_question_summary_reranked = rerank_docs(reranker,docs_question_dict["docs_summaries"],question)
263
- docs_question_fulltext_reranked = rerank_docs(reranker,docs_question_dict["docs_full"],question)
264
- docs_question_images_reranked = rerank_docs(reranker,docs_question_dict["docs_images"],question)
265
- if rerank_by_question:
266
- docs_question_summary_reranked = sorted(docs_question_summary_reranked, key=lambda x: x.metadata["reranking_score"], reverse=True)
267
- docs_question_fulltext_reranked = sorted(docs_question_fulltext_reranked, key=lambda x: x.metadata["reranking_score"], reverse=True)
268
- docs_question_images_reranked = sorted(docs_question_images_reranked, key=lambda x: x.metadata["reranking_score"], reverse=True)
269
  else:
270
- docs_question = docs_question_dict["docs_summaries"] + docs_question_dict["docs_full"]
271
  # Add a default reranking score
272
  for doc in docs_question:
273
  doc.metadata["reranking_score"] = doc.metadata["similarity_score"]
274
 
275
- docs_question = docs_question_summary_reranked + docs_question_fulltext_reranked
276
- docs_question = docs_question[:k_by_question]
277
- images_question = docs_question_images_reranked[:k_images]
278
-
279
  if reranker is not None and rerank_by_question:
280
- docs_question = sorted(docs_question, key=lambda x: x.metadata["reranking_score"], reverse=True)
281
-
282
  # Add sources used in the metadata
283
  docs_question = _add_sources_used_in_metadata(docs_question,sources,question,index)
284
  images_question = _add_sources_used_in_metadata(images_question,sources,question,index)
285
 
286
- # Add to the list of docs
287
- docs.extend(docs_question)
288
- related_content.extend(images_question)
289
- new_state = {"documents":docs, "related_contents": related_content,"remaining_questions":remaining_questions}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  return new_state
 
 
291
 
 
 
 
292
 
 
293
 
294
- def make_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
295
- @chain
296
- async def retrieve_docs(state, config):
297
- state = await retrieve_documents(state,config, vectorstore,reranker,llm,rerank_by_question, k_final, k_before_reranking, k_summary)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  return state
299
 
300
- return retrieve_docs
301
 
302
 
303
 
 
7
  from langchain_core.runnables import RunnableParallel, RunnablePassthrough
8
  from langchain_core.runnables import RunnableLambda
9
 
10
+ from ..reranker import rerank_docs, rerank_and_sort_docs
11
  # from ...knowledge.retriever import ClimateQARetriever
12
  from ...knowledge.openalex import OpenAlexRetriever
13
  from .keywords_extraction import make_keywords_extraction_chain
 
15
  from langchain_core.vectorstores import VectorStore
16
  from typing import List
17
  from langchain_core.documents.base import Document
18
+ import asyncio
19
 
20
+ from typing import Any, Dict, List, Tuple
21
 
22
 
23
  def divide_into_parts(target, parts):
 
89
  elif n_questions == 2:
90
  return 5
91
  elif n_questions == 3:
92
+ return 3
93
  else:
94
  return 1
95
 
 
100
  doc.page_content = doc.page_content.replace("\r\n"," ")
101
  doc.metadata["similarity_score"] = score
102
  doc.metadata["content"] = doc.page_content
103
+ if doc.metadata["page_number"] != "N/A":
104
+ doc.metadata["page_number"] = int(doc.metadata["page_number"]) + 1
105
+ else:
106
+ doc.metadata["page_number"] = 1
107
  # doc.page_content = f"""Doc {i+1} - {doc.metadata['short_name']}: {doc.page_content}"""
108
  docs_with_metadata.append(doc)
109
  return docs_with_metadata
110
 
111
+ def remove_duplicates_chunks(docs):
112
+ # Remove duplicates or almost duplicates
113
+ docs = sorted(docs,key=lambda x: x[1],reverse=True)
114
+ seen = set()
115
+ result = []
116
+ for doc in docs:
117
+ if doc[0].page_content not in seen:
118
+ seen.add(doc[0].page_content)
119
+ result.append(doc)
120
+ return result
121
+
122
+ async def get_POC_relevant_documents(
123
+ query: str,
124
+ vectorstore:VectorStore,
125
+ sources:list = ["Acclimaterra","PCAET","Plan Biodiversite"],
126
+ search_figures:bool = False,
127
+ search_only:bool = False,
128
+ k_documents:int = 10,
129
+ threshold:float = 0.6,
130
+ k_images: int = 5,
131
+ reports:list = [],
132
+ min_size:int = 200,
133
+ ) :
134
+ # Prepare base search kwargs
135
+ filters = {}
136
+ docs_question = []
137
+ docs_images = []
138
+
139
+ # TODO add source selection
140
+ # if len(reports) > 0:
141
+ # filters["short_name"] = {"$in":reports}
142
+ # else:
143
+ # filters["source"] = { "$in": sources}
144
+
145
+ filters_text = {
146
+ **filters,
147
+ "chunk_type":"text",
148
+ # "report_type": {}, # TODO to be completed to choose the right documents / chapters according to the analysis of the question
149
+ }
150
+
151
+ docs_question = vectorstore.similarity_search_with_score(query=query,filter = filters_text,k = k_documents)
152
+ # remove duplicates or almost duplicates
153
+ docs_question = remove_duplicates_chunks(docs_question)
154
+ docs_question = [x for x in docs_question if x[1] > threshold]
155
+
156
+ if search_figures:
157
+ # Images
158
+ filters_image = {
159
+ **filters,
160
+ "chunk_type":"image"
161
+ }
162
+ docs_images = vectorstore.similarity_search_with_score(query=query,filter = filters_image,k = k_images)
163
+
164
+ docs_question, docs_images = _add_metadata_and_score(docs_question), _add_metadata_and_score(docs_images)
165
+
166
+ docs_question = [x for x in docs_question if len(x.page_content) > min_size]
167
+
168
+ return {
169
+ "docs_question" : docs_question,
170
+ "docs_images" : docs_images
171
+ }
172
+
173
+
174
  async def get_IPCC_relevant_documents(
175
  query: str,
176
  vectorstore:VectorStore,
 
232
  "chunk_type":"text",
233
  "report_type": { "$nin":["SPM"]},
234
  }
235
+ docs_full = vectorstore.similarity_search_with_score(query=query,filter = filters_full,k = k_total)
 
236
 
237
  if search_figures:
238
  # Images
 
255
  }
256
 
257
 
258
+
259
+ def concatenate_documents(index, source_type, docs_question_dict, k_by_question, k_summary_by_question, k_images_by_question):
260
+ # Keep the right number of documents - The k_summary documents from SPM are placed in front
261
+ if source_type == "IPx":
262
+ docs_question = docs_question_dict["docs_summaries"][:k_summary_by_question] + docs_question_dict["docs_full"][:(k_by_question - k_summary_by_question)]
263
+ elif source_type == "POC" :
264
+ docs_question = docs_question_dict["docs_question"][:k_by_question]
265
+ else :
266
+ raise ValueError("source_type should be either Vector or POC")
267
+ # docs_question = [doc for key in docs_question_dict.keys() for doc in docs_question_dict[key]][:(k_by_question)]
268
+
269
+ images_question = docs_question_dict["docs_images"][:k_images_by_question]
270
+
271
+ return docs_question, images_question
272
+
273
 
274
  # The chain callback is not necessary, but it propagates the langchain callbacks to the astream_events logger to display intermediate results
275
  # @chain
276
+ async def retrieve_documents(
277
+ current_question: Dict[str, Any],
278
+ config: Dict[str, Any],
279
+ source_type: str,
280
+ vectorstore: VectorStore,
281
+ reranker: Any,
282
+ search_figures: bool = False,
283
+ search_only: bool = False,
284
+ reports: list = [],
285
+ rerank_by_question: bool = True,
286
+ k_images_by_question: int = 5,
287
+ k_before_reranking: int = 100,
288
+ k_by_question: int = 5,
289
+ k_summary_by_question: int = 3
290
+ ) -> Tuple[List[Document], List[Document]]:
291
  """
292
+ Unpack the first question of the remaining questions, and retrieve and rerank corresponding documents, based on the question and selected_sources
293
 
294
  Args:
295
  state (dict): The current state containing documents, related content, relevant content sources, remaining questions and n_questions.
296
+ current_question (dict): The current question being processed.
297
  config (dict): Configuration settings for logging and other purposes.
298
  vectorstore (object): The vector store used to retrieve relevant documents.
299
  reranker (object): The reranker used to rerank the retrieved documents.
 
306
  Returns:
307
  dict: The updated state containing the retrieved and reranked documents, related content, and remaining questions.
308
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  sources = current_question["sources"]
310
  question = current_question["question"]
311
  index = current_question["index"]
312
+ source_type = current_question["source_type"]
313
 
314
  print(f"Retrieve documents for question: {question}")
315
  await log_event({"question":question,"sources":sources,"index":index},"log_retriever",config)
316
 
317
+ print(f"""---- Retrieve documents from {current_question["source_type"]}----""")
318
 
319
+ if source_type == "IPx":
320
  docs_question_dict = await get_IPCC_relevant_documents(
321
  query = question,
322
  vectorstore=vectorstore,
323
  search_figures = search_figures,
324
  sources = sources,
325
  min_size = 200,
326
+ k_summary = k_before_reranking-1,
327
  k_total = k_before_reranking,
328
  k_images = k_images_by_question,
329
  threshold = 0.5,
330
  search_only = search_only,
331
+ reports = reports,
332
  )
 
333
 
334
+ if source_type == "POC":
335
+ docs_question_dict = await get_POC_relevant_documents(
336
+ query = question,
337
+ vectorstore=vectorstore,
338
+ search_figures = search_figures,
339
+ sources = sources,
340
+ threshold = 0.5,
341
+ search_only = search_only,
342
+ reports = reports,
343
+ min_size= 200,
344
+ k_documents= k_before_reranking,
345
+ k_images= k_by_question
346
+ )
347
+
348
  # Rerank
349
+ if reranker is not None and rerank_by_question:
350
  with suppress_output():
351
+ for key in docs_question_dict.keys():
352
+ docs_question_dict[key] = rerank_and_sort_docs(reranker,docs_question_dict[key],question)
 
 
 
 
 
353
  else:
 
354
  # Add a default reranking score
355
  for doc in docs_question:
356
  doc.metadata["reranking_score"] = doc.metadata["similarity_score"]
357
 
358
+ # Keep the right number of documents
359
+ docs_question, images_question = concatenate_documents(index, source_type, docs_question_dict, k_by_question, k_summary_by_question, k_images_by_question)
360
+
361
+ # Rerank the documents to put the most relevant in front
362
  if reranker is not None and rerank_by_question:
363
+ docs_question = rerank_and_sort_docs(reranker, docs_question, question)
364
+
365
  # Add sources used in the metadata
366
  docs_question = _add_sources_used_in_metadata(docs_question,sources,question,index)
367
  images_question = _add_sources_used_in_metadata(images_question,sources,question,index)
368
 
369
+ return docs_question, images_question
370
+
371
+
372
+ async def retrieve_documents_for_all_questions(state, config, source_type, to_handle_questions_index, vectorstore, reranker, rerank_by_question=True, k_final=15, k_before_reranking=100):
373
+ """
374
+ Retrieve documents in parallel for all questions.
375
+ """
376
+ # to_handle_questions_index = [x for x in state["questions_list"] if x["source_type"] == "IPx"]
377
+
378
+ # TODO split les questions selon le type de sources dans le state question + conditions sur le nombre de questions traités par type de source
379
+ docs = state.get("documents", [])
380
+ related_content = state.get("related_content", [])
381
+ search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
382
+ search_only = state["search_only"]
383
+ reports = state["reports"]
384
+
385
+ k_by_question = k_final // state["n_questions"]["total"]
386
+ k_summary_by_question = _get_k_summary_by_question(state["n_questions"]["total"])
387
+ k_images_by_question = _get_k_images_by_question(state["n_questions"]["total"])
388
+ k_before_reranking=100
389
+
390
+ tasks = [
391
+ retrieve_documents(
392
+ current_question=question,
393
+ config=config,
394
+ source_type=source_type,
395
+ vectorstore=vectorstore,
396
+ reranker=reranker,
397
+ search_figures=search_figures,
398
+ search_only=search_only,
399
+ reports=reports,
400
+ rerank_by_question=rerank_by_question,
401
+ k_images_by_question=k_images_by_question,
402
+ k_before_reranking=k_before_reranking,
403
+ k_by_question=k_by_question,
404
+ k_summary_by_question=k_summary_by_question
405
+ )
406
+ for i, question in enumerate(state["questions_list"]) if i in to_handle_questions_index
407
+ ]
408
+ results = await asyncio.gather(*tasks)
409
+ # Combine results
410
+ new_state = {"documents": [], "related_contents": [], "handled_questions_index": to_handle_questions_index}
411
+ for docs_question, images_question in results:
412
+ new_state["documents"].extend(docs_question)
413
+ new_state["related_contents"].extend(images_question)
414
  return new_state
415
+
416
+ def make_IPx_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
417
 
418
+ async def retrieve_IPx_docs(state, config):
419
+ source_type = "IPx"
420
+ IPx_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "IPx"]
421
 
422
+ # return {"documents":[], "related_contents": [], "handled_questions_index": list(range(len(state["questions_list"])))} # TODO Remove
423
 
424
+ state = await retrieve_documents_for_all_questions(
425
+ state=state,
426
+ config=config,
427
+ source_type=source_type,
428
+ to_handle_questions_index=IPx_questions_index,
429
+ vectorstore=vectorstore,
430
+ reranker=reranker,
431
+ rerank_by_question=rerank_by_question,
432
+ k_final=k_final,
433
+ k_before_reranking=k_before_reranking,
434
+ )
435
+ return state
436
+
437
+ return retrieve_IPx_docs
438
+
439
+
440
+ def make_POC_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
441
+
442
+ async def retrieve_POC_docs_node(state, config):
443
+ if "POC region" not in state["relevant_content_sources_selection"] :
444
+ return {}
445
+
446
+ source_type = "POC"
447
+ POC_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "POC"]
448
+
449
+ state = await retrieve_documents_for_all_questions(
450
+ state=state,
451
+ config=config,
452
+ source_type=source_type,
453
+ to_handle_questions_index=POC_questions_index,
454
+ vectorstore=vectorstore,
455
+ reranker=reranker,
456
+ rerank_by_question=rerank_by_question,
457
+ k_final=k_final,
458
+ k_before_reranking=k_before_reranking,
459
+ )
460
  return state
461
 
462
+ return retrieve_POC_docs_node
463
 
464
 
465
 
climateqa/engine/chains/retrieve_papers.py CHANGED
@@ -32,8 +32,8 @@ def generate_keywords(query):
32
  return keywords
33
 
34
 
35
- async def find_papers(query,after, relevant_content_sources, reranker= reranker):
36
- if "OpenAlex" in relevant_content_sources:
37
  summary = ""
38
  keywords = generate_keywords(query)
39
  df_works = oa.search(keywords,after = after)
 
32
  return keywords
33
 
34
 
35
+ async def find_papers(query,after, relevant_content_sources_selection, reranker= reranker):
36
+ if "Papers (OpenAlex)" in relevant_content_sources_selection:
37
  summary = ""
38
  keywords = generate_keywords(query)
39
  df_works = oa.search(keywords,after = after)
climateqa/engine/graph.py CHANGED
@@ -9,6 +9,9 @@ from langchain_core.runnables.graph import CurveStyle, MermaidDrawMethod
9
  from typing_extensions import TypedDict
10
  from typing import List, Dict
11
 
 
 
 
12
  from IPython.display import display, HTML, Image
13
 
14
  from .chains.answer_chitchat import make_chitchat_node
@@ -16,7 +19,7 @@ from .chains.answer_ai_impact import make_ai_impact_node
16
  from .chains.query_transformation import make_query_transform_node
17
  from .chains.translation import make_translation_node
18
  from .chains.intent_categorization import make_intent_categorization_node
19
- from .chains.retrieve_documents import make_retriever_node
20
  from .chains.answer_rag import make_rag_node
21
  from .chains.graph_retriever import make_graph_retriever_node
22
  from .chains.chitchat_categorization import make_chitchat_intent_categorization_node
@@ -31,25 +34,30 @@ class GraphState(TypedDict):
31
  intent : str
32
  search_graphs_chitchat : bool
33
  query: str
34
- remaining_questions : List[dict]
 
35
  n_questions : int
36
  answer: str
37
  audience: str = "experts"
38
  sources_input: List[str] = ["IPCC","IPBES"]
39
- relevant_content_sources: List[str] = ["IPCC figures"]
40
  sources_auto: bool = True
41
  min_year: int = 1960
42
  max_year: int = None
43
- documents: List[Document]
44
- related_contents : Dict[str,Document]
45
  recommended_content : List[Document]
46
  search_only : bool = False
 
 
 
 
47
 
48
  def search(state): #TODO
49
- return state
50
 
51
  def answer_search(state):#TODO
52
- return state
53
 
54
  def route_intent(state):
55
  intent = state["intent"]
@@ -59,7 +67,7 @@ def route_intent(state):
59
  # return "answer_ai_impact"
60
  else:
61
  # Search route
62
- return "search"
63
 
64
  def chitchat_route_intent(state):
65
  intent = state["search_graphs_chitchat"]
@@ -72,27 +80,74 @@ def route_translation(state):
72
  if state["language"].lower() == "english":
73
  return "transform_query"
74
  else:
75
- return "translate_query"
 
 
76
 
77
  def route_based_on_relevant_docs(state,threshold_docs=0.2):
78
  docs = [x for x in state["documents"] if x.metadata["reranking_score"] > threshold_docs]
 
79
  if len(docs) > 0:
80
  return "answer_rag"
81
  else:
82
  return "answer_rag_no_docs"
83
 
84
- def route_retrieve_documents(state):
85
- if state["search_only"] :
86
- return END
87
- elif len(state["remaining_questions"]) > 0:
 
 
 
 
88
  return "retrieve_documents"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  else:
90
- return "answer_search"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  def make_id_dict(values):
93
  return {k:k for k in values}
94
 
95
- def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, threshold_docs=0.2):
96
 
97
  workflow = StateGraph(GraphState)
98
 
@@ -102,8 +157,9 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
102
  translate_query = make_translation_node(llm)
103
  answer_chitchat = make_chitchat_node(llm)
104
  answer_ai_impact = make_ai_impact_node(llm)
105
- retrieve_documents = make_retriever_node(vectorstore_ipcc, reranker, llm)
106
  retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
 
107
  answer_rag = make_rag_node(llm, with_docs=True)
108
  answer_rag_no_docs = make_rag_node(llm, with_docs=False)
109
  chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
@@ -111,13 +167,14 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
111
  # Define the nodes
112
  # workflow.add_node("set_defaults", set_defaults)
113
  workflow.add_node("categorize_intent", categorize_intent)
114
- workflow.add_node("search", search)
115
  workflow.add_node("answer_search", answer_search)
116
  workflow.add_node("transform_query", transform_query)
117
  workflow.add_node("translate_query", translate_query)
118
  workflow.add_node("answer_chitchat", answer_chitchat)
119
  workflow.add_node("chitchat_categorize_intent", chitchat_categorize_intent)
120
  workflow.add_node("retrieve_graphs", retrieve_graphs)
 
121
  workflow.add_node("retrieve_graphs_chitchat", retrieve_graphs)
122
  workflow.add_node("retrieve_documents", retrieve_documents)
123
  workflow.add_node("answer_rag", answer_rag)
@@ -130,7 +187,7 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
130
  workflow.add_conditional_edges(
131
  "categorize_intent",
132
  route_intent,
133
- make_id_dict(["answer_chitchat","search"])
134
  )
135
 
136
  workflow.add_conditional_edges(
@@ -140,15 +197,96 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
140
  )
141
 
142
  workflow.add_conditional_edges(
143
- "search",
144
  route_translation,
145
  make_id_dict(["translate_query","transform_query"])
146
  )
 
 
 
 
 
 
147
  workflow.add_conditional_edges(
148
- "retrieve_documents",
149
- # lambda state : "retrieve_documents" if len(state["remaining_questions"]) > 0 else "answer_search",
150
  route_retrieve_documents,
151
- make_id_dict([END,"retrieve_documents","answer_search"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  )
153
 
154
  workflow.add_conditional_edges(
@@ -158,13 +296,15 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
158
  )
159
  workflow.add_conditional_edges(
160
  "transform_query",
161
- lambda state : "retrieve_graphs" if "OurWorldInData" in state["relevant_content_sources"] else END,
162
  make_id_dict(["retrieve_graphs", END])
163
  )
164
 
165
  # Define the edges
166
  workflow.add_edge("translate_query", "transform_query")
167
- workflow.add_edge("transform_query", "retrieve_documents")
 
 
168
 
169
  workflow.add_edge("retrieve_graphs", END)
170
  workflow.add_edge("answer_rag", END)
@@ -172,6 +312,8 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
172
  workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
173
  workflow.add_edge("retrieve_graphs_chitchat", END)
174
 
 
 
175
 
176
  # Compile
177
  app = workflow.compile()
 
9
  from typing_extensions import TypedDict
10
  from typing import List, Dict
11
 
12
+ import operator
13
+ from typing import Annotated
14
+
15
  from IPython.display import display, HTML, Image
16
 
17
  from .chains.answer_chitchat import make_chitchat_node
 
19
  from .chains.query_transformation import make_query_transform_node
20
  from .chains.translation import make_translation_node
21
  from .chains.intent_categorization import make_intent_categorization_node
22
+ from .chains.retrieve_documents import make_IPx_retriever_node, make_POC_retriever_node
23
  from .chains.answer_rag import make_rag_node
24
  from .chains.graph_retriever import make_graph_retriever_node
25
  from .chains.chitchat_categorization import make_chitchat_intent_categorization_node
 
34
  intent : str
35
  search_graphs_chitchat : bool
36
  query: str
37
+ questions_list : List[dict]
38
+ handled_questions_index : Annotated[list[int], operator.add]
39
  n_questions : int
40
  answer: str
41
  audience: str = "experts"
42
  sources_input: List[str] = ["IPCC","IPBES"]
43
+ relevant_content_sources_selection: List[str] = ["Figures (IPCC/IPBES)"]
44
  sources_auto: bool = True
45
  min_year: int = 1960
46
  max_year: int = None
47
+ documents: Annotated[List[Document], operator.add]
48
+ related_contents : Annotated[List[Document], operator.add]
49
  recommended_content : List[Document]
50
  search_only : bool = False
51
+ reports : List[str] = []
52
+
53
+ def dummy(state):
54
+ return
55
 
56
  def search(state): #TODO
57
+ return
58
 
59
  def answer_search(state):#TODO
60
+ return
61
 
62
  def route_intent(state):
63
  intent = state["intent"]
 
67
  # return "answer_ai_impact"
68
  else:
69
  # Search route
70
+ return "answer_climate"
71
 
72
  def chitchat_route_intent(state):
73
  intent = state["search_graphs_chitchat"]
 
80
  if state["language"].lower() == "english":
81
  return "transform_query"
82
  else:
83
+ return "transform_query"
84
+ # return "translate_query" #TODO : add translation
85
+
86
 
87
  def route_based_on_relevant_docs(state,threshold_docs=0.2):
88
  docs = [x for x in state["documents"] if x.metadata["reranking_score"] > threshold_docs]
89
+ print("Route : ", ["answer_rag" if len(docs) > 0 else "answer_rag_no_docs"])
90
  if len(docs) > 0:
91
  return "answer_rag"
92
  else:
93
  return "answer_rag_no_docs"
94
 
95
+ def route_continue_retrieve_documents(state):
96
+ index_question_ipx = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "IPx"]
97
+ questions_ipx_finished = all(elem in state["handled_questions_index"] for elem in index_question_ipx)
98
+ # if questions_ipx_finished and state["search_only"]:
99
+ # return END
100
+ if questions_ipx_finished:
101
+ return "end_retrieve_IPx_documents"
102
+ else:
103
  return "retrieve_documents"
104
+
105
+
106
+ # if state["n_questions"]["IPx"] == len(state["handled_questions_index"]) and state["search_only"] :
107
+ # return END
108
+ # elif state["n_questions"]["IPx"] == len(state["handled_questions_index"]):
109
+ # return "answer_search"
110
+ # else :
111
+ # return "retrieve_documents"
112
+
113
+ def route_continue_retrieve_local_documents(state):
114
+ index_question_poc = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "POC"]
115
+ questions_poc_finished = all(elem in state["handled_questions_index"] for elem in index_question_poc)
116
+ # if questions_poc_finished and state["search_only"]:
117
+ # return END
118
+ if questions_poc_finished or ("POC region" not in state["relevant_content_sources_selection"]):
119
+ return "end_retrieve_local_documents"
120
  else:
121
+ return "retrieve_local_data"
122
+
123
+ # if state["n_questions"]["POC"] == len(state["handled_questions_index"]) and state["search_only"] :
124
+ # return END
125
+ # elif state["n_questions"]["POC"] == len(state["handled_questions_index"]):
126
+ # return "answer_search"
127
+ # else :
128
+ # return "retrieve_local_data"
129
+
130
+ # if len(state["remaining_questions"]) == 0 and state["search_only"] :
131
+ # return END
132
+ # elif len(state["remaining_questions"]) > 0:
133
+ # return "retrieve_documents"
134
+ # else:
135
+ # return "answer_search"
136
+
137
+ def route_retrieve_documents(state):
138
+ sources_to_retrieve = []
139
+
140
+ if "Graphs (OurWorldInData)" in state["relevant_content_sources_selection"] :
141
+ sources_to_retrieve.append("retrieve_graphs")
142
+
143
+ if sources_to_retrieve == []:
144
+ return END
145
+ return sources_to_retrieve
146
 
147
  def make_id_dict(values):
148
  return {k:k for k in values}
149
 
150
+ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_region, reranker, threshold_docs=0.2):
151
 
152
  workflow = StateGraph(GraphState)
153
 
 
157
  translate_query = make_translation_node(llm)
158
  answer_chitchat = make_chitchat_node(llm)
159
  answer_ai_impact = make_ai_impact_node(llm)
160
+ retrieve_documents = make_IPx_retriever_node(vectorstore_ipcc, reranker, llm)
161
  retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
162
+ # retrieve_local_data = make_POC_retriever_node(vectorstore_region, reranker, llm)
163
  answer_rag = make_rag_node(llm, with_docs=True)
164
  answer_rag_no_docs = make_rag_node(llm, with_docs=False)
165
  chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
 
167
  # Define the nodes
168
  # workflow.add_node("set_defaults", set_defaults)
169
  workflow.add_node("categorize_intent", categorize_intent)
170
+ workflow.add_node("answer_climate", dummy)
171
  workflow.add_node("answer_search", answer_search)
172
  workflow.add_node("transform_query", transform_query)
173
  workflow.add_node("translate_query", translate_query)
174
  workflow.add_node("answer_chitchat", answer_chitchat)
175
  workflow.add_node("chitchat_categorize_intent", chitchat_categorize_intent)
176
  workflow.add_node("retrieve_graphs", retrieve_graphs)
177
+ # workflow.add_node("retrieve_local_data", retrieve_local_data)
178
  workflow.add_node("retrieve_graphs_chitchat", retrieve_graphs)
179
  workflow.add_node("retrieve_documents", retrieve_documents)
180
  workflow.add_node("answer_rag", answer_rag)
 
187
  workflow.add_conditional_edges(
188
  "categorize_intent",
189
  route_intent,
190
+ make_id_dict(["answer_chitchat","answer_climate"])
191
  )
192
 
193
  workflow.add_conditional_edges(
 
197
  )
198
 
199
  workflow.add_conditional_edges(
200
+ "answer_climate",
201
  route_translation,
202
  make_id_dict(["translate_query","transform_query"])
203
  )
204
+
205
+ workflow.add_conditional_edges(
206
+ "answer_search",
207
+ lambda x : route_based_on_relevant_docs(x,threshold_docs=threshold_docs),
208
+ make_id_dict(["answer_rag","answer_rag_no_docs"])
209
+ )
210
  workflow.add_conditional_edges(
211
+ "transform_query",
 
212
  route_retrieve_documents,
213
+ make_id_dict(["retrieve_graphs", END])
214
+ )
215
+
216
+ # Define the edges
217
+ workflow.add_edge("translate_query", "transform_query")
218
+ workflow.add_edge("transform_query", "retrieve_documents") #TODO put back
219
+ # workflow.add_edge("transform_query", "retrieve_local_data")
220
+ # workflow.add_edge("transform_query", END) # TODO remove
221
+
222
+ workflow.add_edge("retrieve_graphs", END)
223
+ workflow.add_edge("answer_rag", END)
224
+ workflow.add_edge("answer_rag_no_docs", END)
225
+ workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
226
+ workflow.add_edge("retrieve_graphs_chitchat", END)
227
+
228
+ # workflow.add_edge("retrieve_local_data", "answer_search")
229
+ workflow.add_edge("retrieve_documents", "answer_search")
230
+
231
+ # Compile
232
+ app = workflow.compile()
233
+ return app
234
+
235
+ def make_graph_agent_poc(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_region, reranker, threshold_docs=0.2):
236
+
237
+ workflow = StateGraph(GraphState)
238
+
239
+ # Define the node functions
240
+ categorize_intent = make_intent_categorization_node(llm)
241
+ transform_query = make_query_transform_node(llm)
242
+ translate_query = make_translation_node(llm)
243
+ answer_chitchat = make_chitchat_node(llm)
244
+ answer_ai_impact = make_ai_impact_node(llm)
245
+ retrieve_documents = make_IPx_retriever_node(vectorstore_ipcc, reranker, llm)
246
+ retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
247
+ retrieve_local_data = make_POC_retriever_node(vectorstore_region, reranker, llm)
248
+ answer_rag = make_rag_node(llm, with_docs=True)
249
+ answer_rag_no_docs = make_rag_node(llm, with_docs=False)
250
+ chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
251
+
252
+ # Define the nodes
253
+ # workflow.add_node("set_defaults", set_defaults)
254
+ workflow.add_node("categorize_intent", categorize_intent)
255
+ workflow.add_node("answer_climate", dummy)
256
+ workflow.add_node("answer_search", answer_search)
257
+ # workflow.add_node("end_retrieve_local_documents", dummy)
258
+ # workflow.add_node("end_retrieve_IPx_documents", dummy)
259
+ workflow.add_node("transform_query", transform_query)
260
+ workflow.add_node("translate_query", translate_query)
261
+ workflow.add_node("answer_chitchat", answer_chitchat)
262
+ workflow.add_node("chitchat_categorize_intent", chitchat_categorize_intent)
263
+ workflow.add_node("retrieve_graphs", retrieve_graphs)
264
+ workflow.add_node("retrieve_local_data", retrieve_local_data)
265
+ workflow.add_node("retrieve_graphs_chitchat", retrieve_graphs)
266
+ workflow.add_node("retrieve_documents", retrieve_documents)
267
+ workflow.add_node("answer_rag", answer_rag)
268
+ workflow.add_node("answer_rag_no_docs", answer_rag_no_docs)
269
+
270
+ # Entry point
271
+ workflow.set_entry_point("categorize_intent")
272
+
273
+ # CONDITIONAL EDGES
274
+ workflow.add_conditional_edges(
275
+ "categorize_intent",
276
+ route_intent,
277
+ make_id_dict(["answer_chitchat","answer_climate"])
278
+ )
279
+
280
+ workflow.add_conditional_edges(
281
+ "chitchat_categorize_intent",
282
+ chitchat_route_intent,
283
+ make_id_dict(["retrieve_graphs_chitchat", END])
284
+ )
285
+
286
+ workflow.add_conditional_edges(
287
+ "answer_climate",
288
+ route_translation,
289
+ make_id_dict(["translate_query","transform_query"])
290
  )
291
 
292
  workflow.add_conditional_edges(
 
296
  )
297
  workflow.add_conditional_edges(
298
  "transform_query",
299
+ route_retrieve_documents,
300
  make_id_dict(["retrieve_graphs", END])
301
  )
302
 
303
  # Define the edges
304
  workflow.add_edge("translate_query", "transform_query")
305
+ workflow.add_edge("transform_query", "retrieve_documents") #TODO put back
306
+ workflow.add_edge("transform_query", "retrieve_local_data")
307
+ # workflow.add_edge("transform_query", END) # TODO remove
308
 
309
  workflow.add_edge("retrieve_graphs", END)
310
  workflow.add_edge("answer_rag", END)
 
312
  workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
313
  workflow.add_edge("retrieve_graphs_chitchat", END)
314
 
315
+ workflow.add_edge("retrieve_local_data", "answer_search")
316
+ workflow.add_edge("retrieve_documents", "answer_search")
317
 
318
  # Compile
319
  app = workflow.compile()
climateqa/engine/reranker.py CHANGED
@@ -47,4 +47,9 @@ def rerank_docs(reranker,docs,query):
47
  doc.metadata["reranking_score"] = result.score
48
  doc.metadata["query_used_for_retrieval"] = query
49
  docs_reranked.append(doc)
 
 
 
 
 
50
  return docs_reranked
 
47
  doc.metadata["reranking_score"] = result.score
48
  doc.metadata["query_used_for_retrieval"] = query
49
  docs_reranked.append(doc)
50
+ return docs_reranked
51
+
52
+ def rerank_and_sort_docs(reranker, docs, query):
53
+ docs_reranked = rerank_docs(reranker,docs,query)
54
+ docs_reranked = sorted(docs_reranked, key=lambda x: x.metadata["reranking_score"], reverse=True)
55
  return docs_reranked
climateqa/{event_handler.py → handle_stream_events.py} RENAMED
@@ -15,7 +15,14 @@ def init_audience(audience :str) -> str:
15
  audience_prompt = audience_prompts["experts"]
16
  return audience_prompt
17
 
18
- def handle_retrieved_documents(event: StreamEvent, history : list[ChatMessage], used_documents : list[str]) -> tuple[str, list[ChatMessage], list[str]]:
 
 
 
 
 
 
 
19
  """
20
  Handles the retrieved documents and returns the HTML representation of the documents
21
 
@@ -27,26 +34,22 @@ def handle_retrieved_documents(event: StreamEvent, history : list[ChatMessage],
27
  Returns:
28
  tuple[str, list[ChatMessage], list[str]]: The updated HTML representation of the documents, the updated message history and the updated list of used documents
29
  """
 
 
 
30
  try:
31
- docs = event["data"]["output"]["documents"]
32
- docs_html = []
33
- textual_docs = [d for d in docs if d.metadata["chunk_type"] == "text"]
34
- for i, d in enumerate(textual_docs, 1):
35
- if d.metadata["chunk_type"] == "text":
36
- docs_html.append(make_html_source(d, i))
37
 
38
  used_documents = used_documents + [f"{d.metadata['short_name']} - {d.metadata['name']}" for d in docs]
39
  if used_documents!=[]:
40
  history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
41
-
42
- docs_html = "".join(docs_html)
43
 
44
- related_contents = event["data"]["output"]["related_contents"]
45
-
46
  except Exception as e:
47
  print(f"Error getting documents: {e}")
48
  print(event)
49
- return docs, docs_html, history, used_documents, related_contents
50
 
51
  def stream_answer(history: list[ChatMessage], event : StreamEvent, start_streaming : bool, answer_message_content : str)-> tuple[list[ChatMessage], bool, str]:
52
  """
 
15
  audience_prompt = audience_prompts["experts"]
16
  return audience_prompt
17
 
18
+ def convert_to_docs_to_html(docs: list[dict]) -> str:
19
+ docs_html = []
20
+ for i, d in enumerate(docs, 1):
21
+ if d.metadata["chunk_type"] == "text":
22
+ docs_html.append(make_html_source(d, i))
23
+ return "".join(docs_html)
24
+
25
+ def handle_retrieved_documents(event: StreamEvent, history : list[ChatMessage], used_documents : list[str],related_content:list[str]) -> tuple[str, list[ChatMessage], list[str]]:
26
  """
27
  Handles the retrieved documents and returns the HTML representation of the documents
28
 
 
34
  Returns:
35
  tuple[str, list[ChatMessage], list[str]]: The updated HTML representation of the documents, the updated message history and the updated list of used documents
36
  """
37
+ if "documents" not in event["data"]["output"] or event["data"]["output"]["documents"] == []:
38
+ return history, used_documents, related_content
39
+
40
  try:
41
+ docs = event["data"]["output"]["documents"]
 
 
 
 
 
42
 
43
  used_documents = used_documents + [f"{d.metadata['short_name']} - {d.metadata['name']}" for d in docs]
44
  if used_documents!=[]:
45
  history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
46
+
47
+ #TODO do the same for related contents
48
 
 
 
49
  except Exception as e:
50
  print(f"Error getting documents: {e}")
51
  print(event)
52
+ return history, used_documents, related_content
53
 
54
  def stream_answer(history: list[ChatMessage], event : StreamEvent, start_streaming : bool, answer_message_content : str)-> tuple[list[ChatMessage], bool, str]:
55
  """
front/deprecated.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Functions to toggle visibility
3
+ def toggle_summary_visibility():
4
+ global summary_visible
5
+ summary_visible = not summary_visible
6
+ return gr.update(visible=summary_visible)
7
+
8
+ def toggle_relevant_visibility():
9
+ global relevant_visible
10
+ relevant_visible = not relevant_visible
11
+ return gr.update(visible=relevant_visible)
12
+
13
+ def change_completion_status(current_state):
14
+ current_state = 1 - current_state
15
+ return current_state
16
+
17
+
18
+
19
+ def vote(data: gr.LikeData):
20
+ if data.liked:
21
+ print(data.value)
22
+ else:
23
+ print(data)
24
+
25
+ def save_graph(saved_graphs_state, embedding, category):
26
+ print(f"\nCategory:\n{saved_graphs_state}\n")
27
+ if category not in saved_graphs_state:
28
+ saved_graphs_state[category] = []
29
+ if embedding not in saved_graphs_state[category]:
30
+ saved_graphs_state[category].append(embedding)
31
+ return saved_graphs_state, gr.Button("Graph Saved")
32
+
33
+
34
+ # Function to save feedback
35
+ def save_feedback(feed: str, user_id):
36
+ if len(feed) > 1:
37
+ timestamp = str(datetime.now().timestamp())
38
+ file = user_id + timestamp + ".json"
39
+ logs = {
40
+ "user_id": user_id,
41
+ "feedback": feed,
42
+ "time": timestamp,
43
+ }
44
+ log_on_azure(file, logs, share_client)
45
+ return "Feedback submitted, thank you!"
46
+
front/event_listeners.py ADDED
File without changes
front/tabs/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from .tab_config import create_config_modal
2
+ from .tab_examples import create_examples_tab
3
+ from .tab_papers import create_papers_tab
4
+ from .tab_figures import create_figures_tab
5
+ from .chat_interface import create_chat_interface
6
+ from .tab_about import create_about_tab
front/tabs/chat_interface.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio.components import ChatMessage
3
+
4
+ # Initialize prompt and system template
5
+ init_prompt = """
6
+ Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports**.
7
+
8
+ ❓ How to use
9
+ - **Language**: You can ask me your questions in any language.
10
+ - **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
11
+ - **Sources**: You can choose to search in the IPCC or IPBES reports, or both.
12
+ - **Relevant content sources**: You can choose to search for figures, papers, or graphs that can be relevant for your question.
13
+
14
+ ⚠️ Limitations
15
+ *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
16
+
17
+ 🛈 Information
18
+ Please note that we log your questions for meta-analysis purposes, so avoid sharing any sensitive or personal information.
19
+
20
+ What do you want to learn ?
21
+ """
22
+
23
+
24
+
25
+ # UI Layout Components
26
+ def create_chat_interface():
27
+ chatbot = gr.Chatbot(
28
+ value=[ChatMessage(role="assistant", content=init_prompt)],
29
+ type="messages",
30
+ show_copy_button=True,
31
+ show_label=False,
32
+ elem_id="chatbot",
33
+ layout="panel",
34
+ avatar_images=(None, "https://i.ibb.co/YNyd5W2/logo4.png"),
35
+ max_height="80vh",
36
+ height="100vh"
37
+ )
38
+
39
+ with gr.Row(elem_id="input-message"):
40
+
41
+ textbox = gr.Textbox(
42
+ placeholder="Ask me anything here!",
43
+ show_label=False,
44
+ scale=12,
45
+ lines=1,
46
+ interactive=True,
47
+ elem_id=f"input-textbox"
48
+ )
49
+
50
+ config_button = gr.Button("", elem_id="config-button")
51
+
52
+ return chatbot, textbox, config_button
53
+
54
+
55
+
front/tabs/main_tab.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from .chat_interface import create_chat_interface
3
+ from .tab_examples import create_examples_tab
4
+ from .tab_papers import create_papers_tab
5
+ from .tab_figures import create_figures_tab
6
+ from .chat_interface import create_chat_interface
7
+
8
+ def cqa_tab(tab_name):
9
+ # State variables
10
+ current_graphs = gr.State([])
11
+ with gr.Tab(tab_name):
12
+ with gr.Row(elem_id="chatbot-row"):
13
+ # Left column - Chat interface
14
+ with gr.Column(scale=2):
15
+ chatbot, textbox, config_button = create_chat_interface()
16
+
17
+ # Right column - Content panels
18
+ with gr.Column(scale=2, variant="panel", elem_id="right-panel"):
19
+ with gr.Tabs(elem_id="right_panel_tab") as tabs:
20
+ # Examples tab
21
+ with gr.TabItem("Examples", elem_id="tab-examples", id=0):
22
+ examples_hidden, dropdown_samples, samples = create_examples_tab()
23
+
24
+ # Sources tab
25
+ with gr.Tab("Sources", elem_id="tab-sources", id=1) as tab_sources:
26
+ sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
27
+
28
+
29
+ # Recommended content tab
30
+ with gr.Tab("Recommended content", elem_id="tab-recommended_content", id=2) as tab_recommended_content:
31
+ with gr.Tabs(elem_id="group-subtabs") as tabs_recommended_content:
32
+ # Figures subtab
33
+ with gr.Tab("Figures", elem_id="tab-figures", id=3) as tab_figures:
34
+ sources_raw, new_figures, used_figures, gallery_component, figures_cards, figure_modal = create_figures_tab()
35
+
36
+ # Papers subtab
37
+ with gr.Tab("Papers", elem_id="tab-citations", id=4) as tab_papers:
38
+ papers_summary, papers_html, citations_network, papers_modal = create_papers_tab()
39
+
40
+ # Graphs subtab
41
+ with gr.Tab("Graphs", elem_id="tab-graphs", id=5) as tab_graphs:
42
+ graphs_container = gr.HTML(
43
+ "<h2>There are no graphs to be displayed at the moment. Try asking another question.</h2>",
44
+ elem_id="graphs-container"
45
+ )
46
+ return {
47
+ "chatbot": chatbot,
48
+ "textbox": textbox,
49
+ "tabs": tabs,
50
+ "sources_raw": sources_raw,
51
+ "new_figures": new_figures,
52
+ "current_graphs": current_graphs,
53
+ "examples_hidden": examples_hidden,
54
+ "dropdown_samples": dropdown_samples,
55
+ "samples": samples,
56
+ "sources_textbox": sources_textbox,
57
+ "figures_cards": figures_cards,
58
+ "gallery_component": gallery_component,
59
+ "config_button": config_button,
60
+ "papers_html": papers_html,
61
+ "citations_network": citations_network,
62
+ "papers_summary": papers_summary,
63
+ "tab_recommended_content": tab_recommended_content,
64
+ "tab_sources": tab_sources,
65
+ "tab_figures": tab_figures,
66
+ "tab_graphs": tab_graphs,
67
+ "tab_papers": tab_papers,
68
+ "graph_container": graphs_container
69
+ }
front/tabs/tab_about.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ # Citation information
4
+ CITATION_LABEL = "BibTeX citation for ClimateQ&A"
5
+ CITATION_TEXT = r"""@misc{climateqa,
6
+ author={Théo Alves Da Costa, Timothée Bohe},
7
+ title={ClimateQ&A, AI-powered conversational assistant for climate change and biodiversity loss},
8
+ year={2024},
9
+ howpublished= {\url{https://climateqa.com}},
10
+ }
11
+ @software{climateqa,
12
+ author = {Théo Alves Da Costa, Timothée Bohe},
13
+ publisher = {ClimateQ&A},
14
+ title = {ClimateQ&A, AI-powered conversational assistant for climate change and biodiversity loss},
15
+ }
16
+ """
17
+
18
+ def create_about_tab():
19
+ with gr.Tab("About", elem_classes="max-height other-tabs"):
20
+ with gr.Row():
21
+ with gr.Column(scale=1):
22
+ gr.Markdown(
23
+ """
24
+ ### More info
25
+ - See more info at [https://climateqa.com](https://climateqa.com/docs/intro/)
26
+ - Feedbacks on this [form](https://forms.office.com/e/1Yzgxm6jbp)
27
+
28
+ ### Citation
29
+ """
30
+ )
31
+ with gr.Accordion(CITATION_LABEL, elem_id="citation", open=False):
32
+ gr.Textbox(
33
+ value=CITATION_TEXT,
34
+ label="",
35
+ interactive=False,
36
+ show_copy_button=True,
37
+ lines=len(CITATION_TEXT.split('\n')),
38
+ )
front/tabs/tab_config.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_modal import Modal
3
+ from climateqa.constants import POSSIBLE_REPORTS
4
+ from typing import TypedDict
5
+
6
+ class ConfigPanel(TypedDict):
7
+ config_open: gr.State
8
+ config_modal: Modal
9
+ dropdown_sources: gr.CheckboxGroup
10
+ dropdown_reports: gr.Dropdown
11
+ dropdown_external_sources: gr.CheckboxGroup
12
+ search_only: gr.Checkbox
13
+ dropdown_audience: gr.Dropdown
14
+ after: gr.Slider
15
+ output_query: gr.Textbox
16
+ output_language: gr.Textbox
17
+
18
+
19
+ def create_config_modal():
20
+ config_open = gr.State(value=True)
21
+ with Modal(visible=False, elem_id="modal-config") as config_modal:
22
+ gr.Markdown("Reminders: You can talk in any language, ClimateQ&A is multi-lingual!")
23
+
24
+ dropdown_sources = gr.CheckboxGroup(
25
+ choices=["IPCC", "IPBES", "IPOS"],
26
+ label="Select source (by default search in all sources)",
27
+ value=["IPCC"],
28
+ interactive=True
29
+ )
30
+
31
+ dropdown_reports = gr.Dropdown(
32
+ choices=POSSIBLE_REPORTS,
33
+ label="Or select specific reports",
34
+ multiselect=True,
35
+ value=None,
36
+ interactive=True
37
+ )
38
+
39
+ dropdown_external_sources = gr.CheckboxGroup(
40
+ choices=["Figures (IPCC/IPBES)", "Papers (OpenAlex)", "Graphs (OurWorldInData)","POC region"],
41
+ label="Select database to search for relevant content",
42
+ value=["Figures (IPCC/IPBES)","POC region"],
43
+ interactive=True
44
+ )
45
+
46
+ search_only = gr.Checkbox(
47
+ label="Search only for recommended content without chating",
48
+ value=False,
49
+ interactive=True,
50
+ elem_id="checkbox-chat"
51
+ )
52
+
53
+ dropdown_audience = gr.Dropdown(
54
+ choices=["Children", "General public", "Experts"],
55
+ label="Select audience",
56
+ value="Experts",
57
+ interactive=True
58
+ )
59
+
60
+ after = gr.Slider(
61
+ minimum=1950,
62
+ maximum=2023,
63
+ step=1,
64
+ value=1960,
65
+ label="Publication date",
66
+ show_label=True,
67
+ interactive=True,
68
+ elem_id="date-papers",
69
+ visible=False
70
+ )
71
+
72
+ output_query = gr.Textbox(
73
+ label="Query used for retrieval",
74
+ show_label=True,
75
+ elem_id="reformulated-query",
76
+ lines=2,
77
+ interactive=False,
78
+ visible=False
79
+ )
80
+
81
+ output_language = gr.Textbox(
82
+ label="Language",
83
+ show_label=True,
84
+ elem_id="language",
85
+ lines=1,
86
+ interactive=False,
87
+ visible=False
88
+ )
89
+
90
+ dropdown_external_sources.change(
91
+ lambda x: gr.update(visible="Papers (OpenAlex)" in x),
92
+ inputs=[dropdown_external_sources],
93
+ outputs=[after]
94
+ )
95
+
96
+ close_config_modal_button = gr.Button("Validate and Close", elem_id="close-config-modal")
97
+
98
+
99
+ # return ConfigPanel(
100
+ # config_open=config_open,
101
+ # config_modal=config_modal,
102
+ # dropdown_sources=dropdown_sources,
103
+ # dropdown_reports=dropdown_reports,
104
+ # dropdown_external_sources=dropdown_external_sources,
105
+ # search_only=search_only,
106
+ # dropdown_audience=dropdown_audience,
107
+ # after=after,
108
+ # output_query=output_query,
109
+ # output_language=output_language
110
+ # )
111
+ return {
112
+ "config_open" : config_open,
113
+ "config_modal": config_modal,
114
+ "dropdown_sources": dropdown_sources,
115
+ "dropdown_reports": dropdown_reports,
116
+ "dropdown_external_sources": dropdown_external_sources,
117
+ "search_only": search_only,
118
+ "dropdown_audience": dropdown_audience,
119
+ "after": after,
120
+ "output_query": output_query,
121
+ "output_language": output_language,
122
+ "close_config_modal_button": close_config_modal_button
123
+ }
front/tabs/tab_examples.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from climateqa.sample_questions import QUESTIONS
3
+
4
+
5
+ def create_examples_tab():
6
+ examples_hidden = gr.Textbox(visible=False, elem_id=f"examples-hidden")
7
+ first_key = list(QUESTIONS.keys())[0]
8
+ dropdown_samples = gr.Dropdown(
9
+ choices=QUESTIONS.keys(),
10
+ value=first_key,
11
+ interactive=True,
12
+ label="Select a category of sample questions",
13
+ elem_id="dropdown-samples"
14
+ )
15
+
16
+ samples = []
17
+ for i, key in enumerate(QUESTIONS.keys()):
18
+ examples_visible = (i == 0)
19
+ with gr.Row(visible=examples_visible) as group_examples:
20
+ examples_questions = gr.Examples(
21
+ examples=QUESTIONS[key],
22
+ inputs=[examples_hidden],
23
+ examples_per_page=8,
24
+ run_on_click=False,
25
+ elem_id=f"examples{i}",
26
+ api_name=f"examples{i}"
27
+ )
28
+ samples.append(group_examples)
29
+
30
+
31
+ def change_sample_questions(key):
32
+ index = list(QUESTIONS.keys()).index(key)
33
+ visible_bools = [False] * len(samples)
34
+ visible_bools[index] = True
35
+ return [gr.update(visible=visible_bools[i]) for i in range(len(samples))]
36
+
37
+ # event listener
38
+ dropdown_samples.change(change_sample_questions, dropdown_samples, samples)
39
+
40
+ return examples_hidden
front/tabs/tab_figures.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_modal import Modal
3
+
4
+
5
+ def create_figures_tab():
6
+ sources_raw = gr.State()
7
+ new_figures = gr.State([])
8
+ used_figures = gr.State([])
9
+
10
+ with Modal(visible=False, elem_id="modal_figure_galery") as figure_modal:
11
+ gallery_component = gr.Gallery(
12
+ object_fit='scale-down',
13
+ elem_id="gallery-component",
14
+ height="80vh"
15
+ )
16
+
17
+ show_full_size_figures = gr.Button(
18
+ "Show figures in full size",
19
+ elem_id="show-figures",
20
+ interactive=True
21
+ )
22
+ show_full_size_figures.click(
23
+ lambda: Modal(visible=True),
24
+ None,
25
+ figure_modal
26
+ )
27
+
28
+ figures_cards = gr.HTML(show_label=False, elem_id="sources-figures")
29
+
30
+ return sources_raw, new_figures, used_figures, gallery_component, figures_cards, figure_modal
31
+
front/tabs/tab_papers.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_modal import Modal
3
+
4
+
5
+ def create_papers_tab():
6
+ with gr.Accordion(
7
+ visible=True,
8
+ elem_id="papers-summary-popup",
9
+ label="See summary of relevant papers",
10
+ open=False
11
+ ) as summary_popup:
12
+ papers_summary = gr.Markdown("", visible=True, elem_id="papers-summary")
13
+
14
+ with gr.Accordion(
15
+ visible=True,
16
+ elem_id="papers-relevant-popup",
17
+ label="See relevant papers",
18
+ open=False
19
+ ) as relevant_popup:
20
+ papers_html = gr.HTML(show_label=False, elem_id="papers-textbox")
21
+
22
+ btn_citations_network = gr.Button("Explore papers citations network")
23
+ with Modal(visible=False) as papers_modal:
24
+ citations_network = gr.HTML(
25
+ "<h3>Citations Network Graph</h3>",
26
+ visible=True,
27
+ elem_id="papers-citations-network"
28
+ )
29
+ btn_citations_network.click(
30
+ lambda: Modal(visible=True),
31
+ None,
32
+ papers_modal
33
+ )
34
+
35
+ return papers_summary, papers_html, citations_network, papers_modal
36
+
front/tabs/tab_recommended_content.py ADDED
File without changes
front/utils.py CHANGED
@@ -39,23 +39,33 @@ def parse_output_llm_with_sources(output:str)->str:
39
  content_parts = "".join(parts)
40
  return content_parts
41
 
42
- def process_figures(docs:list)->tuple:
43
- gallery=[]
44
- used_figures =[]
 
 
 
 
45
  figures = '<div class="figures-container"><p></p> </div>'
 
 
 
 
 
 
 
46
  docs_figures = [d for d in docs if d.metadata["chunk_type"] == "image"]
47
- for i, doc in enumerate(docs_figures):
48
- if doc.metadata["chunk_type"] == "image":
49
- if doc.metadata["figure_code"] != "N/A":
50
- title = f"{doc.metadata['figure_code']} - {doc.metadata['short_name']}"
51
- else:
52
- title = f"{doc.metadata['short_name']}"
53
 
54
 
55
- if title not in used_figures:
56
- used_figures.append(title)
 
 
57
  try:
58
- key = f"Image {i+1}"
59
 
60
  image_path = doc.metadata["image_path"].split("documents/")[1]
61
  img = get_image_from_azure_blob_storage(image_path)
@@ -68,12 +78,12 @@ def process_figures(docs:list)->tuple:
68
 
69
  img_str = base64.b64encode(buffered.getvalue()).decode()
70
 
71
- figures = figures + make_html_figure_sources(doc, i, img_str)
72
  gallery.append(img)
73
  except Exception as e:
74
- print(f"Skipped adding image {i} because of {e}")
75
 
76
- return figures, gallery
77
 
78
 
79
  def generate_html_graphs(graphs:list)->str:
 
39
  content_parts = "".join(parts)
40
  return content_parts
41
 
42
+
43
+
44
+ def process_figures(docs:list, new_figures:list)->tuple:
45
+ if new_figures == []:
46
+ return docs, "", []
47
+ docs = docs + new_figures
48
+
49
  figures = '<div class="figures-container"><p></p> </div>'
50
+ gallery = []
51
+ used_figures = []
52
+
53
+ if docs == []:
54
+ return docs, figures, gallery
55
+
56
+
57
  docs_figures = [d for d in docs if d.metadata["chunk_type"] == "image"]
58
+ for i_doc, doc in enumerate(docs_figures):
59
+ if doc.metadata["chunk_type"] == "image":
60
+ path = doc.metadata["image_path"]
 
 
 
61
 
62
 
63
+ if path not in used_figures:
64
+ used_figures.append(path)
65
+ figure_number = len(used_figures)
66
+
67
  try:
68
+ key = f"Image {figure_number}"
69
 
70
  image_path = doc.metadata["image_path"].split("documents/")[1]
71
  img = get_image_from_azure_blob_storage(image_path)
 
78
 
79
  img_str = base64.b64encode(buffered.getvalue()).decode()
80
 
81
+ figures = figures + make_html_figure_sources(doc, figure_number, img_str)
82
  gallery.append(img)
83
  except Exception as e:
84
+ print(f"Skipped adding image {figure_number} because of {e}")
85
 
86
+ return docs, figures, gallery
87
 
88
 
89
  def generate_html_graphs(graphs:list)->str:
requirements.txt CHANGED
@@ -4,7 +4,7 @@ azure-storage-blob
4
  python-dotenv==1.0.0
5
  langchain==0.2.1
6
  langchain_openai==0.1.7
7
- langgraph==0.0.55
8
  pinecone-client==4.1.0
9
  sentence-transformers==2.6.0
10
  huggingface-hub
 
4
  python-dotenv==1.0.0
5
  langchain==0.2.1
6
  langchain_openai==0.1.7
7
+ langgraph==0.2.70
8
  pinecone-client==4.1.0
9
  sentence-transformers==2.6.0
10
  huggingface-hub
sandbox/20241104 - CQA - StepByStep CQA.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
style.css CHANGED
@@ -1,89 +1,127 @@
1
-
2
  /* :root {
3
  --user-image: url('https://ih1.redbubble.net/image.4776899543.6215/st,small,507x507-pad,600x600,f8f8f8.jpg');
4
- } */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- #tab-recommended_content{
7
- padding-top: 0px;
8
- padding-left : 0px;
9
- padding-right: 0px;
10
  }
 
11
  #group-subtabs {
12
  /* display: block; */
13
- width: 100%; /* Ensures the parent uses the full width */
14
  position : sticky;
15
  }
16
 
17
- #group-subtabs .tab-container {
18
- display: flex;
19
- text-align: center;
20
- width: 100%; /* Ensures the tabs span the full width */
21
- }
22
 
23
- #group-subtabs .tab-container button {
24
- flex: 1; /* Makes each button take equal width */
25
  }
26
 
 
 
 
27
 
28
- #papers-summary-popup button span{
29
- /* make label of accordio in bold, center, and bigger */
30
- font-size: 16px;
31
  font-weight: bold;
32
- text-align: center;
 
33
 
 
 
34
  }
35
 
36
- #papers-relevant-popup span{
37
- /* make label of accordio in bold, center, and bigger */
38
- font-size: 16px;
39
- font-weight: bold;
40
- text-align: center;
41
  }
42
 
 
 
 
 
 
 
 
 
 
 
43
 
 
 
 
44
 
45
- #tab-citations .button{
46
- padding: 12px 16px;
47
- font-size: 16px;
48
  font-weight: bold;
49
- cursor: pointer;
50
- border: none;
51
- outline: none;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  text-align: left;
53
- transition: background-color 0.3s ease;
54
  }
55
 
 
 
 
56
 
57
- .gradio-container {
58
- width: 100%!important;
59
- max-width: 100% !important;
60
  }
61
 
62
- /* fix for huggingface infinite growth*/
63
- main.flex.flex-1.flex-col {
64
- max-height: 95vh !important;
65
  }
66
 
67
- button#show-figures{
68
- /* Base styles */
69
- background-color: #f5f5f5;
70
- border: 1px solid #e0e0e0;
71
- border-radius: 4px;
72
- color: #333333;
73
- cursor: pointer;
74
- width: 100%;
75
- text-align: center;
76
  }
77
 
78
- .avatar-container.svelte-1x5p6hu:not(.thumbnail-item) img {
79
- width: 100%;
80
- height: 100%;
81
- object-fit: cover;
82
- border-radius: 50%;
83
- padding: 0px;
84
- margin: 0px;
85
  }
86
 
 
87
  .warning-box {
88
  background-color: #fff3cd;
89
  border: 1px solid #ffeeba;
@@ -93,32 +131,20 @@ button#show-figures{
93
  color: #856404;
94
  display: inline-block;
95
  margin-bottom: 15px;
96
- }
97
-
98
 
99
  .tip-box {
100
  background-color: #f0f9ff;
101
  border: 1px solid #80d4fa;
102
  border-radius: 4px;
103
- margin-top:20px;
104
  padding: 15px 20px;
105
  font-size: 14px;
106
  display: inline-block;
107
- margin-bottom: 15px;
108
  width: auto;
109
- color:black !important;
110
- }
111
-
112
- body.dark .warning-box * {
113
- color:black !important;
114
- }
115
-
116
-
117
- body.dark .tip-box * {
118
- color:black !important;
119
  }
120
 
121
-
122
  .tip-box-title {
123
  font-weight: bold;
124
  font-size: 14px;
@@ -130,116 +156,128 @@ body.dark .tip-box * {
130
  margin-right: 5px;
131
  }
132
 
133
- .gr-box {border-color: #d6c37c}
134
-
135
- #hidden-message{
136
- display:none;
 
 
 
 
 
 
 
137
  }
138
 
139
- .message{
140
- font-size:14px !important;
141
-
142
- }
143
- .card-content img {
144
- display: block;
145
- margin: auto;
146
- max-width: 100%; /* Ensures the image is responsive */
147
- height: auto;
148
  }
149
 
150
- a {
151
- text-decoration: none;
152
- color: inherit;
 
 
 
 
153
  }
154
 
155
- .doc-ref sup{
156
- color:#dc2626!important;
157
- /* margin-right:1px; */
158
  }
159
 
 
 
 
 
160
 
161
- .card {
162
- background-color: white;
163
- border-radius: 10px;
164
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
165
- overflow: hidden;
166
- display: flex;
167
- flex-direction: column;
168
- margin:20px;
169
  }
170
 
171
- .card-content {
172
- padding: 20px;
 
 
173
  }
174
 
175
- .card-content h2 {
176
- font-size: 14px !important;
177
- font-weight: bold;
178
- margin-bottom: 10px;
179
- margin-top:0px !important;
180
- color:#dc2626!important;;
181
  }
182
 
183
- .card-content p {
184
- font-size: 12px;
185
- margin-bottom: 0;
 
 
186
  }
187
 
188
- .card-footer {
189
- background-color: #f4f4f4;
190
- font-size: 10px;
191
  padding: 10px;
 
 
192
  display: flex;
193
- justify-content: space-between;
194
  align-items: center;
 
 
195
  }
196
 
197
- .card-footer span {
198
- flex-grow: 1;
199
- text-align: left;
200
- color: #999 !important;
 
 
 
 
 
201
  }
202
 
203
- .pdf-link {
204
- display: inline-flex;
205
- align-items: center;
206
- margin-left: auto;
207
- text-decoration: none!important;
208
- font-size: 14px;
 
 
 
209
  }
210
 
211
-
212
-
213
- .message.user{
214
- /* background-color:#7494b0 !important; */
215
- border:none;
216
- /* color:white!important; */
217
  }
218
 
219
- .message.bot{
220
- /* background-color:#f2f2f7 !important; */
221
- border:none;
222
  }
223
 
224
-
225
- label.selected{
226
- background: #93c5fd !important;
227
  }
228
 
229
- #submit-button{
230
- padding:0px !important;
231
  }
232
 
233
- #modal-config .block.modal-block.padded {
234
- padding-top: 25px;
235
- height: 100vh;
236
-
237
- }
238
- #modal-config .modal-container{
239
- margin: 0px;
240
- padding: 0px;
241
  }
242
- /* Modal styles */
 
243
  #modal-config {
244
  position: fixed;
245
  top: 0;
@@ -252,28 +290,23 @@ label.selected{
252
  padding: 15px;
253
  transform: none;
254
  }
255
- #modal-config .close{
256
- display: none;
 
 
257
  }
258
 
259
- /* Push main content to the right when modal is open */
260
- /* .modal ~ * {
261
- margin-left: 300px;
262
- transition: margin-left 0.3s ease;
263
- } */
264
 
265
- #modal-config .modal .wrap ul{
266
- position:static;
267
- top: 100%;
268
- left: 0;
269
- /* min-height: 100px; */
270
- height: 100%;
271
- /* margin-top: 0; */
272
- z-index: 9999;
273
- pointer-events: auto;
274
- height: 200px;
275
  }
276
- #config-button{
 
 
277
  background: none;
278
  border: none;
279
  padding: 8px;
@@ -296,155 +329,231 @@ label.selected{
296
  background-color: rgba(0, 0, 0, 0.1);
297
  }
298
 
299
- #checkbox-config{
300
- display: block;
301
- position: absolute;
302
- background: none;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  border: none;
304
- padding: 8px;
 
 
 
 
 
 
 
 
 
 
305
  cursor: pointer;
306
- width: 40px;
307
- height: 40px;
308
- display: flex;
309
- align-items: center;
310
- justify-content: center;
311
- border-radius: 50%;
312
- transition: background-color 0.2s;
313
- font-size: 20px;
314
  text-align: center;
315
  }
316
- #checkbox-config:checked{
317
- display: block;
 
 
318
  }
319
 
 
 
 
 
320
 
 
 
 
 
321
 
322
- @media screen and (min-width: 1024px) {
323
- /* Additional style for scrollable tab content */
324
- /* div#tab-recommended_content {
325
- overflow-y: auto;
326
- max-height: 80vh;
327
- } */
328
 
329
- .gradio-container {
330
- max-height: calc(100vh - 190px) !important;
331
- overflow: hidden;
332
- }
333
- /* div#chatbot{
334
- height:calc(100vh - 170px) !important;
335
- max-height:calc(100vh - 170px) !important;
336
 
337
- } */
 
 
 
 
338
 
 
 
 
 
339
 
340
-
341
- div#tab-examples{
342
- height:calc(100vh - 190px) !important;
343
- overflow-y: scroll !important;
344
- /* overflow-y: auto; */
345
- }
346
 
347
- div#sources-textbox{
348
- height:calc(100vh - 190px) !important;
349
- overflow-y: scroll !important;
350
- /* overflow-y: auto !important; */
351
- }
352
- div#graphs-container{
353
- height:calc(100vh - 210px) !important;
354
- overflow-y: scroll !important;
355
- }
356
 
357
- div#sources-figures{
358
- height:calc(100vh - 300px) !important;
359
- max-height: 90vh !important;
360
- overflow-y: scroll !important;
361
- }
362
 
363
- div#graphs-container{
364
- height:calc(100vh - 300px) !important;
365
- max-height: 90vh !important;
366
- overflow-y: scroll !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
  }
368
 
369
- div#tab-citations{
370
- height:calc(100vh - 300px) !important;
371
- max-height: 90vh !important;
 
372
  overflow-y: scroll !important;
373
  }
374
 
375
- div#tab-config{
376
- height:calc(100vh - 190px) !important;
 
 
 
377
  overflow-y: scroll !important;
378
- /* overflow-y: auto !important; */
379
  }
380
 
381
- /* Force container to respect height limits */
382
- .main-component{
383
- contain: size layout;
384
- overflow: hidden;
385
  }
386
 
387
-
388
- div#chatbot-row{
389
- max-height:calc(100vh - 90px) !important;
390
  }
391
- /*
392
 
393
-
394
- .max-height{
395
- height:calc(100vh - 90px) !important;
396
- max-height:calc(100vh - 90px) !important;
397
  overflow-y: auto;
 
398
  }
399
- */
400
-
401
  }
402
 
403
- footer {
404
- visibility: hidden;
405
- display:none !important;
406
- }
407
-
408
-
409
  @media screen and (max-width: 767px) {
410
- /* Your mobile-specific styles go here */
411
-
412
- div#chatbot{
413
- height:500px !important;
414
  }
415
 
416
- #submit-button{
417
- padding:0px !important;
418
  min-width: 80px;
419
  }
420
 
421
- /* This will hide all list items */
422
  div.tab-nav button {
423
  display: none !important;
424
  }
425
 
426
- /* This will show only the first list item */
427
- div.tab-nav button:first-child {
428
- display: block !important;
429
- }
430
-
431
- /* This will show only the first list item */
432
  div.tab-nav button:nth-child(2) {
433
  display: block !important;
434
  }
435
-
436
- #right-panel button{
437
  display: block !important;
438
  }
439
 
440
- /* ... add other mobile-specific styles ... */
 
 
 
 
 
 
 
 
441
  }
442
 
 
443
  @media (prefers-color-scheme: dark) {
444
- .card{
445
  background-color: #374151;
446
  }
447
- .card-image > .card-content{
 
448
  background-color: rgb(55, 65, 81) !important;
449
  }
450
 
@@ -452,251 +561,48 @@ footer {
452
  background-color: #404652;
453
  }
454
 
455
- .container > .wrap{
456
  background-color: #374151 !important;
457
- color:white !important;
458
  }
459
- .card-content h2{
460
- color:#e7754f !important;
461
- }
462
- .doc-ref sup{
463
- color:rgb(235 109 35)!important;
464
- /* margin-right:1px; */
465
  }
 
466
  .card-footer span {
467
- color:white !important;
468
  }
469
-
470
- }
471
-
472
-
473
- .doc-ref{
474
- color:#dc2626!important;
475
- margin-right:1px;
476
- }
477
-
478
- .tabitem{
479
- border:none !important;
480
- }
481
-
482
- .other-tabs > div{
483
- padding-left:40px;
484
- padding-right:40px;
485
- padding-top:10px;
486
- }
487
 
488
- .gallery-item > div{
489
- white-space: normal !important; /* Allow the text to wrap */
490
- word-break: break-word !important; /* Break words to prevent overflow */
491
- overflow-wrap: break-word !important; /* Break long words if necessary */
492
- }
493
-
494
- span.chatbot > p > img{
495
- margin-top:40px !important;
496
- max-height: none !important;
497
- max-width: 80% !important;
498
- border-radius:0px !important;
499
- }
500
-
501
-
502
- .chatbot-caption{
503
- font-size:11px;
504
- font-style:italic;
505
- color:#508094;
506
- }
507
-
508
- .ai-generated{
509
- font-size:11px!important;
510
- font-style:italic;
511
- color:#73b8d4 !important;
512
- }
513
-
514
- .card-image > .card-content{
515
- background-color:#f1f7fa;
516
- }
517
-
518
-
519
-
520
- .tab-nav > button.selected{
521
- color:#4b8ec3;
522
- font-weight:bold;
523
- border:none;
524
- }
525
-
526
- .tab-nav{
527
- border:none !important;
528
- }
529
-
530
- #input-textbox > label > textarea{
531
- border-radius:40px;
532
- padding-left:30px;
533
- resize:none;
534
- }
535
-
536
- #input-message > div{
537
- border:none;
538
- }
539
-
540
- #dropdown-samples{
541
-
542
- background:none !important;
543
-
544
- }
545
-
546
- #dropdown-samples > .container > .wrap{
547
- background-color:white;
548
- }
549
-
550
-
551
- #tab-examples > div > .form{
552
- border:none;
553
- background:none !important;
554
- }
555
 
556
- .a-doc-ref{
557
- text-decoration: none !important;
 
558
  }
559
 
560
-
561
- .dropdown {
562
- position: relative;
563
- display:inline-block;
564
- margin-bottom: 10px;
565
- }
566
-
567
- .dropdown-toggle {
568
- background-color: #f2f2f2;
569
- color: black;
570
- padding: 10px;
571
- font-size: 16px;
572
- cursor: pointer;
573
- display: block;
574
- width: 400px; /* Adjust width as needed */
575
- position: relative;
576
- display: flex;
577
- align-items: center; /* Vertically center the contents */
578
- justify-content: left;
579
- }
580
-
581
- .dropdown-toggle .caret {
582
- content: "";
583
- position: absolute;
584
- right: 10px;
585
- top: 50%;
586
- border-left: 5px solid transparent;
587
- border-right: 5px solid transparent;
588
- border-top: 5px solid black;
589
- transform: translateY(-50%);
590
- }
591
-
592
- input[type="checkbox"] {
593
- display: none !important;
594
- }
595
-
596
- input[type="checkbox"]:checked + .dropdown-content {
597
  display: block;
598
- }
599
-
600
- #checkbox-chat input[type="checkbox"] {
601
- display: flex !important;
602
- }
603
-
604
- .dropdown-content {
605
- display: none;
606
  position: absolute;
607
- background-color: #f9f9f9;
608
- min-width: 300px;
609
- box-shadow: 0 8px 16px 0 rgba(0,0,0,0.2);
610
- z-index: 1;
611
- padding: 12px;
612
- border: 1px solid #ccc;
613
- }
614
-
615
- input[type="checkbox"]:checked + .dropdown-toggle + .dropdown-content {
616
- display: block;
617
- }
618
-
619
- input[type="checkbox"]:checked + .dropdown-toggle .caret {
620
- border-top: 0;
621
- border-bottom: 5px solid black;
622
- }
623
-
624
- .loader {
625
- border: 1px solid #d0d0d0 !important; /* Light grey background */
626
- border-top: 1px solid #db3434 !important; /* Blue color */
627
- border-right: 1px solid #3498db !important; /* Blue color */
628
  border-radius: 50%;
629
- width: 20px;
630
- height: 20px;
631
- animation: spin 2s linear infinite;
632
- display:inline-block;
633
- margin-right:10px !important;
634
- }
635
-
636
- .checkmark{
637
- color:green !important;
638
- font-size:18px;
639
- margin-right:10px !important;
640
- }
641
-
642
- @keyframes spin {
643
- 0% { transform: rotate(0deg); }
644
- 100% { transform: rotate(360deg); }
645
- }
646
-
647
-
648
- .relevancy-score{
649
- margin-top:10px !important;
650
- font-size:10px !important;
651
- font-style:italic;
652
- }
653
-
654
- .score-green{
655
- color:green !important;
656
- }
657
-
658
- .score-orange{
659
- color:orange !important;
660
- }
661
-
662
- .score-red{
663
- color:red !important;
664
- }
665
-
666
- /* Mobile specific adjustments */
667
- @media screen and (max-width: 767px) {
668
- div#tab-recommended_content {
669
- max-height: 50vh; /* Reduce height for smaller screens */
670
- overflow-y: auto;
671
- }
672
- }
673
-
674
- /* Additional style for scrollable tab content */
675
- div#tab-saved-graphs {
676
- overflow-y: auto; /* Enable vertical scrolling */
677
- max-height: 80vh; /* Adjust height as needed */
678
- }
679
-
680
- /* Mobile specific adjustments */
681
- @media screen and (max-width: 767px) {
682
- div#tab-saved-graphs {
683
- max-height: 50vh; /* Reduce height for smaller screens */
684
- overflow-y: auto;
685
- }
686
- }
687
- .message-buttons-left.panel.message-buttons.with-avatar {
688
- display: none;
689
- }
690
-
691
-
692
- /* Specific fixes for Hugging Face Space iframe */
693
- .h-full {
694
- height: auto !important;
695
- min-height: 0 !important;
696
  }
697
 
698
- .space-content {
699
- height: auto !important;
700
- max-height: 100vh !important;
701
- overflow: hidden;
702
  }
 
1
+ /* Root Variables */
2
  /* :root {
3
  --user-image: url('https://ih1.redbubble.net/image.4776899543.6215/st,small,507x507-pad,600x600,f8f8f8.jpg');
4
+ } */
5
+
6
+ /* Layout & Container Styles */
7
+ .gradio-container {
8
+ width: 100% !important;
9
+ max-width: 100% !important;
10
+ }
11
+
12
+ main.flex.flex-1.flex-col {
13
+ max-height: 95vh !important;
14
+ }
15
+
16
+ .main-component {
17
+ contain: size layout;
18
+ overflow: hidden;
19
+ }
20
 
21
+ /* Tab Styles */
22
+ #tab-recommended_content {
23
+ padding: 0;
 
24
  }
25
+
26
  #group-subtabs {
27
  /* display: block; */
 
28
  position : sticky;
29
  }
30
 
 
 
 
 
 
31
 
 
 
32
  }
33
 
34
+ .tab-nav {
35
+ border: none !important;
36
+ }
37
 
38
+ .tab-nav > button.selected {
39
+ color: #4b8ec3;
 
40
  font-weight: bold;
41
+ border: none;
42
+ }
43
 
44
+ .tabitem {
45
+ border: none !important;
46
  }
47
 
48
+ .other-tabs > div {
49
+ padding: 40px 40px 10px;
 
 
 
50
  }
51
 
52
+ /* Card Styles */
53
+ .card {
54
+ background-color: white;
55
+ border-radius: 10px;
56
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
57
+ overflow: hidden;
58
+ display: flex;
59
+ flex-direction: column;
60
+ margin: 20px;
61
+ }
62
 
63
+ .card-content {
64
+ padding: 20px;
65
+ }
66
 
67
+ .card-content h2 {
68
+ font-size: 14px !important;
 
69
  font-weight: bold;
70
+ margin: 0 0 10px !important;
71
+ color: #dc2626 !important;
72
+ }
73
+
74
+ .card-content p {
75
+ font-size: 12px;
76
+ margin-bottom: 0;
77
+ }
78
+
79
+ .card-content img {
80
+ display: block;
81
+ margin: auto;
82
+ max-width: 100%;
83
+ height: auto;
84
+ }
85
+
86
+ .card-footer {
87
+ background-color: #f4f4f4;
88
+ font-size: 10px;
89
+ padding: 10px;
90
+ display: flex;
91
+ justify-content: space-between;
92
+ align-items: center;
93
+ }
94
+
95
+ .card-footer span {
96
+ flex-grow: 1;
97
  text-align: left;
98
+ color: #999 !important;
99
  }
100
 
101
+ .card-image > .card-content {
102
+ background-color: #f1f7fa;
103
+ }
104
 
105
+ /* Message & Chat Styles */
106
+ .message {
107
+ font-size: 14px !important;
108
  }
109
 
110
+ .message.user, .message.bot {
111
+ border: none;
 
112
  }
113
 
114
+ #input-textbox > label > textarea {
115
+ border-radius: 40px;
116
+ padding-left: 30px;
117
+ resize: none;
 
 
 
 
 
118
  }
119
 
120
+ #input-message > div {
121
+ border: none;
 
 
 
 
 
122
  }
123
 
124
+ /* Alert Boxes */
125
  .warning-box {
126
  background-color: #fff3cd;
127
  border: 1px solid #ffeeba;
 
131
  color: #856404;
132
  display: inline-block;
133
  margin-bottom: 15px;
134
+ }
 
135
 
136
  .tip-box {
137
  background-color: #f0f9ff;
138
  border: 1px solid #80d4fa;
139
  border-radius: 4px;
140
+ margin: 20px 0 15px;
141
  padding: 15px 20px;
142
  font-size: 14px;
143
  display: inline-block;
 
144
  width: auto;
145
+ color: black !important;
 
 
 
 
 
 
 
 
 
146
  }
147
 
 
148
  .tip-box-title {
149
  font-weight: bold;
150
  font-size: 14px;
 
156
  margin-right: 5px;
157
  }
158
 
159
+ /* Loader Animation */
160
+ .loader {
161
+ border: 1px solid #d0d0d0 !important;
162
+ border-top: 1px solid #db3434 !important;
163
+ border-right: 1px solid #3498db !important;
164
+ border-radius: 50%;
165
+ width: 20px;
166
+ height: 20px;
167
+ animation: spin 2s linear infinite;
168
+ display: inline-block;
169
+ margin-right: 10px !important;
170
  }
171
 
172
+ @keyframes spin {
173
+ 0% { transform: rotate(0deg); }
174
+ 100% { transform: rotate(360deg); }
 
 
 
 
 
 
175
  }
176
 
177
+ /* PDF Link Styles */
178
+ .pdf-link {
179
+ display: inline-flex;
180
+ align-items: center;
181
+ margin-left: auto;
182
+ text-decoration: none!important;
183
+ font-size: 14px;
184
  }
185
 
186
+ /* Document Reference Styles */
187
+ .doc-ref sup {
188
+ color: #dc2626!important;
189
  }
190
 
191
+ .doc-ref {
192
+ color: #dc2626!important;
193
+ margin-right: 1px;
194
+ }
195
 
196
+ /* Chatbot & Image Styles */
197
+ span.chatbot > p > img {
198
+ margin-top: 40px !important;
199
+ max-height: none !important;
200
+ max-width: 80% !important;
201
+ border-radius: 0px !important;
 
 
202
  }
203
 
204
+ .chatbot-caption {
205
+ font-size: 11px;
206
+ font-style: italic;
207
+ color: #508094;
208
  }
209
 
210
+ .ai-generated {
211
+ font-size: 11px!important;
212
+ font-style: italic;
213
+ color: #73b8d4 !important;
 
 
214
  }
215
 
216
+ /* Dropdown Styles */
217
+ .dropdown {
218
+ position: relative;
219
+ display: inline-block;
220
+ margin-bottom: 10px;
221
  }
222
 
223
+ .dropdown-toggle {
224
+ background-color: #f2f2f2;
225
+ color: black;
226
  padding: 10px;
227
+ font-size: 16px;
228
+ cursor: pointer;
229
  display: flex;
230
+ width: 400px;
231
  align-items: center;
232
+ justify-content: left;
233
+ position: relative;
234
  }
235
 
236
+ .dropdown-toggle .caret {
237
+ content: "";
238
+ position: absolute;
239
+ right: 10px;
240
+ top: 50%;
241
+ border-left: 5px solid transparent;
242
+ border-right: 5px solid transparent;
243
+ border-top: 5px solid black;
244
+ transform: translateY(-50%);
245
  }
246
 
247
+ .dropdown-content {
248
+ display: none;
249
+ position: absolute;
250
+ background-color: #f9f9f9;
251
+ min-width: 300px;
252
+ box-shadow: 0 8px 16px 0 rgba(0,0,0,0.2);
253
+ z-index: 1;
254
+ padding: 12px;
255
+ border: 1px solid #ccc;
256
  }
257
 
258
+ /* Checkbox Styles */
259
+ input[type="checkbox"] {
260
+ display: none !important;
 
 
 
261
  }
262
 
263
+ #checkbox-chat input[type="checkbox"] {
264
+ display: flex !important;
 
265
  }
266
 
267
+ input[type="checkbox"]:checked + .dropdown-content {
268
+ display: block;
 
269
  }
270
 
271
+ input[type="checkbox"]:checked + .dropdown-toggle + .dropdown-content {
272
+ display: block;
273
  }
274
 
275
+ input[type="checkbox"]:checked + .dropdown-toggle .caret {
276
+ border-top: 0;
277
+ border-bottom: 5px solid black;
 
 
 
 
 
278
  }
279
+
280
+ /* Modal Styles */
281
  #modal-config {
282
  position: fixed;
283
  top: 0;
 
290
  padding: 15px;
291
  transform: none;
292
  }
293
+
294
+ #modal-config .block.modal-block.padded {
295
+ padding-top: 25px;
296
+ height: 100vh;
297
  }
298
 
299
+ #modal-config .modal-container {
300
+ margin: 0px;
301
+ padding: 0px;
302
+ }
 
303
 
304
+ #modal-config .close {
305
+ display: none;
 
 
 
 
 
 
 
 
306
  }
307
+
308
+ /* Config Button Styles */
309
+ #config-button {
310
  background: none;
311
  border: none;
312
  padding: 8px;
 
329
  background-color: rgba(0, 0, 0, 0.1);
330
  }
331
 
332
+ /* Relevancy Score Styles */
333
+ .relevancy-score {
334
+ margin-top: 10px !important;
335
+ font-size: 10px !important;
336
+ font-style: italic;
337
+ }
338
+
339
+ .score-green {
340
+ color: green !important;
341
+ }
342
+
343
+ .score-orange {
344
+ color: orange !important;
345
+ }
346
+
347
+ .score-red {
348
+ color: red !important;
349
+ }
350
+
351
+ /* Gallery Styles */
352
+ .gallery-item > div {
353
+ white-space: normal !important;
354
+ word-break: break-word !important;
355
+ overflow-wrap: break-word !important;
356
+ }
357
+
358
+ /* Avatar Styles */
359
+ .avatar-container.svelte-1x5p6hu:not(.thumbnail-item) img {
360
+ width: 100%;
361
+ height: 100%;
362
+ object-fit: cover;
363
+ border-radius: 50%;
364
+ padding: 0px;
365
+ margin: 0px;
366
+ }
367
+
368
+ /* Message Button Styles */
369
+ .message-buttons-left.panel.message-buttons.with-avatar {
370
+ display: none;
371
+ }
372
+
373
+ /* Checkmark Styles */
374
+ .checkmark {
375
+ color: green !important;
376
+ font-size: 18px;
377
+ margin-right: 10px !important;
378
+ }
379
+
380
+ /* Papers Summary & Relevant Popup Styles */
381
+ #papers-summary-popup button span,
382
+ #papers-relevant-popup span {
383
+ font-size: 16px;
384
+ font-weight: bold;
385
+ text-align: center;
386
+ }
387
+
388
+ /* Citations Tab Button Style */
389
+ #tab-citations .button {
390
+ padding: 12px 16px;
391
+ font-size: 16px;
392
+ font-weight: bold;
393
+ cursor: pointer;
394
  border: none;
395
+ outline: none;
396
+ text-align: left;
397
+ transition: background-color 0.3s ease;
398
+ }
399
+
400
+ /* Show Figures Button Style */
401
+ button#show-figures {
402
+ background-color: #f5f5f5;
403
+ border: 1px solid #e0e0e0;
404
+ border-radius: 4px;
405
+ color: #333333;
406
  cursor: pointer;
407
+ width: 100%;
 
 
 
 
 
 
 
408
  text-align: center;
409
  }
410
+
411
+ /* Gradio Box Style */
412
+ .gr-box {
413
+ border-color: #d6c37c;
414
  }
415
 
416
+ /* Hidden Message Style */
417
+ #hidden-message {
418
+ display: none;
419
+ }
420
 
421
+ /* Label Selected Style */
422
+ label.selected {
423
+ background: #93c5fd !important;
424
+ }
425
 
426
+ /* Submit Button Style */
427
+ #submit-button {
428
+ padding: 0px !important;
429
+ }
 
 
430
 
431
+ /* Hugging Face Space Fixes */
432
+ .h-full {
433
+ height: auto !important;
434
+ min-height: 0 !important;
435
+ }
 
 
436
 
437
+ .space-content {
438
+ height: auto !important;
439
+ max-height: 100vh !important;
440
+ overflow: hidden;
441
+ }
442
 
443
+ /* Dropdown Samples Style */
444
+ #dropdown-samples {
445
+ background: none !important;
446
+ }
447
 
448
+ #dropdown-samples > .container > .wrap {
449
+ background-color: white;
450
+ }
 
 
 
451
 
452
+ /* Tab Examples Form Style */
453
+ #tab-examples > div > .form {
454
+ border: none;
455
+ background: none !important;
456
+ }
 
 
 
 
457
 
458
+ /* Utility Classes */
459
+ .hidden {
460
+ display: none !important;
461
+ }
 
462
 
463
+ footer {
464
+ display: none !important;
465
+ visibility: hidden;
466
+ }
467
+
468
+ a {
469
+ text-decoration: none;
470
+ color: inherit;
471
+ }
472
+
473
+ .a-doc-ref {
474
+ text-decoration: none !important;
475
+ }
476
+
477
+ /* Media Queries */
478
+ /* Desktop Media Query */
479
+ @media screen and (min-width: 1024px) {
480
+ .gradio-container {
481
+ max-height: calc(100vh - 190px) !important;
482
+ overflow: hidden;
483
  }
484
 
485
+ div#tab-examples,
486
+ div#sources-textbox,
487
+ div#tab-config {
488
+ height: calc(100vh - 190px) !important;
489
  overflow-y: scroll !important;
490
  }
491
 
492
+ div#sources-figures,
493
+ div#graphs-container,
494
+ div#tab-citations {
495
+ height: calc(100vh - 300px) !important;
496
+ max-height: 90vh !important;
497
  overflow-y: scroll !important;
 
498
  }
499
 
500
+ div#chatbot-row {
501
+ max-height: calc(100vh - 90px) !important;
 
 
502
  }
503
 
504
+ div#graphs-container {
505
+ height: calc(100vh - 210px) !important;
506
+ overflow-y: scroll !important;
507
  }
 
508
 
509
+ div#tab-saved-graphs {
 
 
 
510
  overflow-y: auto;
511
+ max-height: 80vh;
512
  }
 
 
513
  }
514
 
515
+ /* Mobile Media Query */
 
 
 
 
 
516
  @media screen and (max-width: 767px) {
517
+ div#chatbot {
518
+ height: 500px !important;
 
 
519
  }
520
 
521
+ #submit-button {
522
+ padding: 0 !important;
523
  min-width: 80px;
524
  }
525
 
 
526
  div.tab-nav button {
527
  display: none !important;
528
  }
529
 
530
+ div.tab-nav button:first-child,
 
 
 
 
 
531
  div.tab-nav button:nth-child(2) {
532
  display: block !important;
533
  }
534
+
535
+ #right-panel button {
536
  display: block !important;
537
  }
538
 
539
+ div#tab-recommended_content {
540
+ max-height: 50vh;
541
+ overflow-y: auto;
542
+ }
543
+
544
+ div#tab-saved-graphs {
545
+ max-height: 50vh;
546
+ overflow-y: auto;
547
+ }
548
  }
549
 
550
+ /* Dark Mode */
551
  @media (prefers-color-scheme: dark) {
552
+ .card {
553
  background-color: #374151;
554
  }
555
+
556
+ .card-image > .card-content {
557
  background-color: rgb(55, 65, 81) !important;
558
  }
559
 
 
561
  background-color: #404652;
562
  }
563
 
564
+ .container > .wrap {
565
  background-color: #374151 !important;
566
+ color: white !important;
567
  }
568
+
569
+ .card-content h2 {
570
+ color: #e7754f !important;
 
 
 
571
  }
572
+
573
  .card-footer span {
574
+ color: white !important;
575
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
576
 
577
+ body.dark .warning-box *,
578
+ body.dark .tip-box * {
579
+ color: black !important;
580
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
581
 
582
+ .doc-ref sup {
583
+ color: rgb(235 109 35)!important;
584
+ }
585
  }
586
 
587
+ /* Checkbox Config Style */
588
+ #checkbox-config {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
589
  display: block;
 
 
 
 
 
 
 
 
590
  position: absolute;
591
+ background: none;
592
+ border: none;
593
+ padding: 8px;
594
+ cursor: pointer;
595
+ width: 40px;
596
+ height: 40px;
597
+ display: flex;
598
+ align-items: center;
599
+ justify-content: center;
 
 
 
 
 
 
 
 
 
 
 
 
600
  border-radius: 50%;
601
+ transition: background-color 0.2s;
602
+ font-size: 20px;
603
+ text-align: center;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
604
  }
605
 
606
+ #checkbox-config:checked {
607
+ display: block;
 
 
608
  }