Spaces:
GIZ
/
Running on CPU Upgrade

ppsingh commited on
Commit
c464ae6
·
1 Parent(s): 4b1c32c

UI fixes and soruces fixes

Browse files
app.py CHANGED
@@ -7,6 +7,8 @@ from gradio_client import Client, handle_file
7
  from utils.whisp_api import handle_geojson_upload
8
  from utils.retriever import retrieve_paragraphs
9
  from utils.generator import generate
 
 
10
 
11
  # Sample questions for examples
12
  SAMPLE_QUESTIONS = {
@@ -45,6 +47,32 @@ def finish_chat():
45
  """Finish chat and reset input"""
46
  return gr.update(interactive=True, value="")
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  async def chat_response(query, history, method, country, uploaded_file):
49
  """Generate chat response based on method and inputs"""
50
 
@@ -89,8 +117,15 @@ async def chat_response(query, history, method, country, uploaded_file):
89
  else:
90
  try:
91
  retrieved_paragraphs = retrieve_paragraphs(query, country)
92
- print(country)
93
- print(retrieved_paragraphs)
 
 
 
 
 
 
 
94
  response = await generate(query=query, context=retrieved_paragraphs)
95
 
96
  except Exception as e:
@@ -105,7 +140,7 @@ async def chat_response(query, history, method, country, uploaded_file):
105
  for i, char in enumerate(response_with_disclaimer):
106
  displayed_response += char
107
  history[-1] = (query, displayed_response)
108
- yield history, "**Sources:** Sample source documents would appear here..."
109
  # Only add delay every few characters to avoid being too slow
110
  if i % 3 == 0: # Adjust this number to control speed
111
  await asyncio.sleep(0.02)
@@ -217,7 +252,7 @@ init_prompt = """
217
 
218
  """
219
 
220
- with gr.Blocks(title="EUDR Bot", theme=theme, css=custom_css) as demo:
221
 
222
  # Main Chat Interface
223
  with gr.Tab("EUDR Bot"):
@@ -230,7 +265,7 @@ with gr.Blocks(title="EUDR Bot", theme=theme, css=custom_css) as demo:
230
  show_label=False,
231
  layout="panel",
232
  avatar_images=(None, "chatbot_icon_2.png"),
233
- height=500
234
  )
235
 
236
  # Feedback UI
@@ -319,7 +354,7 @@ with gr.Blocks(title="EUDR Bot", theme=theme, css=custom_css) as demo:
319
  sample_groups.append(group_examples)
320
 
321
  # Sources Tab
322
- with gr.Tab("Fuentes", id=1):
323
  sources_textbox = gr.HTML(
324
  show_label=False,
325
  value="Los documentos originales aparecerán aquí después de que hagas una pregunta..."
 
7
  from utils.whisp_api import handle_geojson_upload
8
  from utils.retriever import retrieve_paragraphs
9
  from utils.generator import generate
10
+ import json
11
+ import ast
12
 
13
  # Sample questions for examples
14
  SAMPLE_QUESTIONS = {
 
47
  """Finish chat and reset input"""
48
  return gr.update(interactive=True, value="")
49
 
50
+
51
+ def make_html_source(source,i):
52
+ """
53
+ takes the text and converts it into html format for display in "source" side tab
54
+ """
55
+ meta = source['answer_metadata']
56
+ content = source['answer'].strip()
57
+
58
+ name = meta['filename']
59
+ card = f"""
60
+ <div class="card" id="doc{i}">
61
+ <div class="card-content">
62
+ <h2>Doc {i} - {meta['filename']} - Page {int(meta['page'])}</h2>
63
+ <p>{content}</p>
64
+ </div>
65
+ <div class="card-footer">
66
+ <span>{name}</span>
67
+ <a href="{meta['filename']}#page={int(meta['page'])}" target="_blank" class="pdf-link">
68
+ <span role="img" aria-label="Open PDF">🔗</span>
69
+ </a>
70
+ </div>
71
+ </div>
72
+ """
73
+
74
+ return card
75
+
76
  async def chat_response(query, history, method, country, uploaded_file):
77
  """Generate chat response based on method and inputs"""
78
 
 
117
  else:
118
  try:
119
  retrieved_paragraphs = retrieve_paragraphs(query, country)
120
+ context_retrieved = ast.literal_eval(retrieved_paragraphs)
121
+ context_retrieved_formatted = "||".join(doc['answer'] for doc in context_retrieved)
122
+ context_retrieved_lst = [doc['answer'] for doc in context_retrieved]
123
+ # print(country)
124
+ # print(retrieved_paragraphs)
125
+ docs_html = []
126
+ for i, d in enumerate(context_retrieved, 1):
127
+ docs_html.append(make_html_source(d, i))
128
+ docs_html = "".join(docs_html)
129
  response = await generate(query=query, context=retrieved_paragraphs)
130
 
131
  except Exception as e:
 
140
  for i, char in enumerate(response_with_disclaimer):
141
  displayed_response += char
142
  history[-1] = (query, displayed_response)
143
+ yield history, docs_html
144
  # Only add delay every few characters to avoid being too slow
145
  if i % 3 == 0: # Adjust this number to control speed
146
  await asyncio.sleep(0.02)
 
252
 
253
  """
254
 
255
+ with gr.Blocks(title="EUDR Bot", theme=theme, css="style.css") as demo:
256
 
257
  # Main Chat Interface
258
  with gr.Tab("EUDR Bot"):
 
265
  show_label=False,
266
  layout="panel",
267
  avatar_images=(None, "chatbot_icon_2.png"),
268
+ height="auto"
269
  )
270
 
271
  # Feedback UI
 
354
  sample_groups.append(group_examples)
355
 
356
  # Sources Tab
357
+ with gr.Tab("Fuentes", id=1, elem_id="sources-textbox"):
358
  sources_textbox = gr.HTML(
359
  show_label=False,
360
  value="Los documentos originales aparecerán aquí después de que hagas una pregunta..."
utils/__pycache__/generator.cpython-310.pyc CHANGED
Binary files a/utils/__pycache__/generator.cpython-310.pyc and b/utils/__pycache__/generator.cpython-310.pyc differ
 
utils/__pycache__/retriever.cpython-310.pyc CHANGED
Binary files a/utils/__pycache__/retriever.cpython-310.pyc and b/utils/__pycache__/retriever.cpython-310.pyc differ
 
utils/__pycache__/whisp_api.cpython-310.pyc CHANGED
Binary files a/utils/__pycache__/whisp_api.cpython-310.pyc and b/utils/__pycache__/whisp_api.cpython-310.pyc differ
 
utils/generator.py CHANGED
@@ -113,67 +113,67 @@ chat_model = get_chat_model()
113
  # ---------------------------------------------------------------------
114
  # Context processing - may need further refinement (i.e. to manage other data sources)
115
  # ---------------------------------------------------------------------
116
- def extract_relevant_fields(retrieval_results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
117
- """
118
- Extract only relevant fields from retrieval results.
119
 
120
- Args:
121
- retrieval_results: List of JSON objects from retriever
122
 
123
- Returns:
124
- List of processed objects with only relevant fields
125
- """
126
 
127
- retrieval_results = ast.literal_eval(retrieval_results)
128
 
129
- processed_results = []
130
 
131
- for result in retrieval_results:
132
- # Extract the answer content
133
- answer = result.get('answer', '')
134
 
135
- # Extract document identification from metadata
136
- metadata = result.get('answer_metadata', {})
137
- doc_info = {
138
- 'answer': answer,
139
- 'filename': metadata.get('filename', 'Unknown'),
140
- 'page': metadata.get('page', 'Unknown'),
141
- 'year': metadata.get('year', 'Unknown'),
142
- 'source': metadata.get('source', 'Unknown'),
143
- 'document_id': metadata.get('_id', 'Unknown')
144
- }
145
 
146
- processed_results.append(doc_info)
147
 
148
- return processed_results
149
 
150
- def format_context_from_results(processed_results: List[Dict[str, Any]]) -> str:
151
- """
152
- Format processed retrieval results into a context string for the LLM.
153
 
154
- Args:
155
- processed_results: List of processed objects with relevant fields
156
 
157
- Returns:
158
- Formatted context string
159
- """
160
- if not processed_results:
161
- return ""
162
 
163
- context_parts = []
164
 
165
- for i, result in enumerate(processed_results, 1):
166
- doc_reference = f"[Document {i}: {result['filename']}"
167
- if result['page'] != 'Unknown':
168
- doc_reference += f", Page {result['page']}"
169
- if result['year'] != 'Unknown':
170
- doc_reference += f", Year {result['year']}"
171
- doc_reference += "]"
172
 
173
- context_part = f"{doc_reference}\n{result['answer']}\n"
174
- context_parts.append(context_part)
175
 
176
- return "\n".join(context_parts)
177
 
178
  # ---------------------------------------------------------------------
179
  # Core generation function for both Gradio UI and MCP
@@ -265,11 +265,11 @@ async def generate(query: str, context: Union[str, List[Dict[str, Any]]]) -> str
265
  return "Error: No retrieval results provided"
266
 
267
  # Process the retrieval results
268
- processed_results = extract_relevant_fields(context)
269
- formatted_context = format_context_from_results(processed_results)
270
 
271
- if not formatted_context.strip():
272
- return "Error: No valid content found in retrieval results"
273
 
274
  elif isinstance(context, str):
275
  if not context.strip():
 
113
  # ---------------------------------------------------------------------
114
  # Context processing - may need further refinement (i.e. to manage other data sources)
115
  # ---------------------------------------------------------------------
116
+ # def extract_relevant_fields(retrieval_results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
117
+ # """
118
+ # Extract only relevant fields from retrieval results.
119
 
120
+ # Args:
121
+ # retrieval_results: List of JSON objects from retriever
122
 
123
+ # Returns:
124
+ # List of processed objects with only relevant fields
125
+ # """
126
 
127
+ # retrieval_results = ast.literal_eval(retrieval_results)
128
 
129
+ # processed_results = []
130
 
131
+ # for result in retrieval_results:
132
+ # # Extract the answer content
133
+ # answer = result.get('answer', '')
134
 
135
+ # # Extract document identification from metadata
136
+ # metadata = result.get('answer_metadata', {})
137
+ # doc_info = {
138
+ # 'answer': answer,
139
+ # 'filename': metadata.get('filename', 'Unknown'),
140
+ # 'page': metadata.get('page', 'Unknown'),
141
+ # 'year': metadata.get('year', 'Unknown'),
142
+ # 'source': metadata.get('source', 'Unknown'),
143
+ # 'document_id': metadata.get('_id', 'Unknown')
144
+ # }
145
 
146
+ # processed_results.append(doc_info)
147
 
148
+ # return processed_results
149
 
150
+ # def format_context_from_results(processed_results: List[Dict[str, Any]]) -> str:
151
+ # """
152
+ # Format processed retrieval results into a context string for the LLM.
153
 
154
+ # Args:
155
+ # processed_results: List of processed objects with relevant fields
156
 
157
+ # Returns:
158
+ # Formatted context string
159
+ # """
160
+ # if not processed_results:
161
+ # return ""
162
 
163
+ # context_parts = []
164
 
165
+ # for i, result in enumerate(processed_results, 1):
166
+ # doc_reference = f"[Document {i}: {result['filename']}"
167
+ # if result['page'] != 'Unknown':
168
+ # doc_reference += f", Page {result['page']}"
169
+ # if result['year'] != 'Unknown':
170
+ # doc_reference += f", Year {result['year']}"
171
+ # doc_reference += "]"
172
 
173
+ # context_part = f"{doc_reference}\n{result['answer']}\n"
174
+ # context_parts.append(context_part)
175
 
176
+ # return "\n".join(context_parts)
177
 
178
  # ---------------------------------------------------------------------
179
  # Core generation function for both Gradio UI and MCP
 
265
  return "Error: No retrieval results provided"
266
 
267
  # Process the retrieval results
268
+ # processed_results = extract_relevant_fields(context)
269
+ formatted_context = context
270
 
271
+ # if not formatted_context.strip():
272
+ # return "Error: No valid content found in retrieval results"
273
 
274
  elif isinstance(context, str):
275
  if not context.strip():