Spaces:
GIZ
/
Running on CPU Upgrade

ppsingh commited on
Commit
07d2ba0
·
1 Parent(s): 2bd2e9b
app.py CHANGED
@@ -6,6 +6,8 @@ from uuid import uuid4
6
  from gradio_client import Client, handle_file
7
  from utils.retriever import retrieve_paragraphs
8
  from utils.generator import generate
 
 
9
 
10
  # Sample questions for examples
11
  SAMPLE_QUESTIONS = {
@@ -33,6 +35,31 @@ def finish_chat():
33
  """Finish chat and reset input"""
34
  return gr.update(interactive=True, value="")
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  async def chat_response(query, history, category):
37
  """Generate chat response based on method and inputs"""
38
 
@@ -72,19 +99,32 @@ async def chat_response(query, history, category):
72
 
73
  # # Handle "Talk to Reports"
74
  # else:
75
- try:
76
- retrieved_paragraphs = retrieve_paragraphs(query, category)
77
- response = await generate(query=query, context=retrieved_paragraphs)
78
-
79
- except Exception as e:
80
- response = f"Error retrieving information: {str(e)}"
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  displayed_response = ""
83
 
84
  for i, char in enumerate(response):
85
  displayed_response += char
86
  history[-1] = (query, displayed_response)
87
- yield history, "**Sources:** Sample source documents would appear here..."
88
  # Only add delay every few characters to avoid being too slow
89
  if i % 3 == 0: # Adjust this number to control speed
90
  await asyncio.sleep(0.02)
 
6
  from gradio_client import Client, handle_file
7
  from utils.retriever import retrieve_paragraphs
8
  from utils.generator import generate
9
+ import json
10
+ import ast
11
 
12
  # Sample questions for examples
13
  SAMPLE_QUESTIONS = {
 
35
  """Finish chat and reset input"""
36
  return gr.update(interactive=True, value="")
37
 
38
+ def make_html_source(source,i):
39
+ """
40
+ takes the text and converts it into html format for display in "source" side tab
41
+ """
42
+ meta = source['answer_metadata']
43
+ content = source['answer'].strip()
44
+
45
+ name = meta['filename']
46
+ card = f"""
47
+ <div class="card" id="doc{i}">
48
+ <div class="card-content">
49
+ <h2>Doc {i} - {meta['filename']} - Page {int(meta['page'])}</h2>
50
+ <p>{content}</p>
51
+ </div>
52
+ <div class="card-footer">
53
+ <span>{name}</span>
54
+ <a href="{meta['filename']}#page={int(meta['page'])}" target="_blank" class="pdf-link">
55
+ <span role="img" aria-label="Open PDF">🔗</span>
56
+ </a>
57
+ </div>
58
+ </div>
59
+ """
60
+
61
+ return card
62
+
63
  async def chat_response(query, history, category):
64
  """Generate chat response based on method and inputs"""
65
 
 
99
 
100
  # # Handle "Talk to Reports"
101
  # else:
 
 
 
 
 
 
102
 
103
+ retrieved_paragraphs = retrieve_paragraphs(query, category)
104
+ context_retrieved = ast.literal_eval(retrieved_paragraphs)
105
+ print(retrieved_paragraphs)
106
+ # print(type(retrieved_paragraphs))
107
+ # api returns output as string, therefore we first convert string using json
108
+ # context_retrieved = json.loads(retrieved_paragraphs)
109
+ # print("converting conesxt to json")
110
+ # building list of only content, no metadata
111
+ context_retrieved_formatted = "||".join(doc['answer'] for doc in context_retrieved)
112
+ context_retrieved_lst = [doc['answer'] for doc in context_retrieved]
113
+ print(context_retrieved_lst)
114
+ ## -----------------Prepare HTML for displaying source documents --------------
115
+ docs_html = []
116
+ for i, d in enumerate(context_retrieved, 1):
117
+ docs_html.append(make_html_source(d, i))
118
+ docs_html = "".join(docs_html)
119
+ response = await generate(query=query, context=context_retrieved_lst)
120
+
121
+
122
  displayed_response = ""
123
 
124
  for i, char in enumerate(response):
125
  displayed_response += char
126
  history[-1] = (query, displayed_response)
127
+ yield history, docs_html
128
  # Only add delay every few characters to avoid being too slow
129
  if i % 3 == 0: # Adjust this number to control speed
130
  await asyncio.sleep(0.02)
utils/__pycache__/retriever.cpython-310.pyc CHANGED
Binary files a/utils/__pycache__/retriever.cpython-310.pyc and b/utils/__pycache__/retriever.cpython-310.pyc differ
 
utils/generator.py CHANGED
@@ -113,67 +113,67 @@ chat_model = get_chat_model()
113
  # ---------------------------------------------------------------------
114
  # Context processing - may need further refinement (i.e. to manage other data sources)
115
  # ---------------------------------------------------------------------
116
- def extract_relevant_fields(retrieval_results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
117
- """
118
- Extract only relevant fields from retrieval results.
119
 
120
- Args:
121
- retrieval_results: List of JSON objects from retriever
122
 
123
- Returns:
124
- List of processed objects with only relevant fields
125
- """
126
 
127
- retrieval_results = ast.literal_eval(retrieval_results)
128
 
129
- processed_results = []
130
 
131
- for result in retrieval_results:
132
- # Extract the answer content
133
- answer = result.get('answer', '')
134
 
135
- # Extract document identification from metadata
136
- metadata = result.get('answer_metadata', {})
137
- doc_info = {
138
- 'answer': answer,
139
- 'filename': metadata.get('filename', 'Unknown'),
140
- 'page': metadata.get('page', 'Unknown'),
141
- 'year': metadata.get('year', 'Unknown'),
142
- 'source': metadata.get('source', 'Unknown'),
143
- 'document_id': metadata.get('_id', 'Unknown')
144
- }
145
 
146
- processed_results.append(doc_info)
147
 
148
- return processed_results
149
 
150
- def format_context_from_results(processed_results: List[Dict[str, Any]]) -> str:
151
- """
152
- Format processed retrieval results into a context string for the LLM.
153
 
154
- Args:
155
- processed_results: List of processed objects with relevant fields
156
 
157
- Returns:
158
- Formatted context string
159
- """
160
- if not processed_results:
161
- return ""
162
 
163
- context_parts = []
164
 
165
- for i, result in enumerate(processed_results, 1):
166
- doc_reference = f"[Document {i}: {result['filename']}"
167
- if result['page'] != 'Unknown':
168
- doc_reference += f", Page {result['page']}"
169
- if result['year'] != 'Unknown':
170
- doc_reference += f", Year {result['year']}"
171
- doc_reference += "]"
172
 
173
- context_part = f"{doc_reference}\n{result['answer']}\n"
174
- context_parts.append(context_part)
175
 
176
- return "\n".join(context_parts)
177
 
178
  # ---------------------------------------------------------------------
179
  # Core generation function for both Gradio UI and MCP
@@ -264,12 +264,12 @@ async def generate(query: str, context: Union[str, List[Dict[str, Any]]]) -> str
264
  if not context:
265
  return "Error: No retrieval results provided"
266
 
267
- # Process the retrieval results
268
- processed_results = extract_relevant_fields(context)
269
- formatted_context = format_context_from_results(processed_results)
270
 
271
- if not formatted_context.strip():
272
- return "Error: No valid content found in retrieval results"
273
 
274
  elif isinstance(context, str):
275
  if not context.strip():
 
113
  # ---------------------------------------------------------------------
114
  # Context processing - may need further refinement (i.e. to manage other data sources)
115
  # ---------------------------------------------------------------------
116
+ # def extract_relevant_fields(retrieval_results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
117
+ # """
118
+ # Extract only relevant fields from retrieval results.
119
 
120
+ # Args:
121
+ # retrieval_results: List of JSON objects from retriever
122
 
123
+ # Returns:
124
+ # List of processed objects with only relevant fields
125
+ # """
126
 
127
+ # retrieval_results = ast.literal_eval(retrieval_results)
128
 
129
+ # processed_results = []
130
 
131
+ # for result in retrieval_results:
132
+ # # Extract the answer content
133
+ # answer = result.get('answer', '')
134
 
135
+ # # Extract document identification from metadata
136
+ # metadata = result.get('answer_metadata', {})
137
+ # doc_info = {
138
+ # 'answer': answer,
139
+ # 'filename': metadata.get('filename', 'Unknown'),
140
+ # 'page': metadata.get('page', 'Unknown'),
141
+ # 'year': metadata.get('year', 'Unknown'),
142
+ # 'source': metadata.get('source', 'Unknown'),
143
+ # 'document_id': metadata.get('_id', 'Unknown')
144
+ # }
145
 
146
+ # processed_results.append(doc_info)
147
 
148
+ # return processed_results
149
 
150
+ # def format_context_from_results(processed_results: List[Dict[str, Any]]) -> str:
151
+ # """
152
+ # Format processed retrieval results into a context string for the LLM.
153
 
154
+ # Args:
155
+ # processed_results: List of processed objects with relevant fields
156
 
157
+ # Returns:
158
+ # Formatted context string
159
+ # """
160
+ # if not processed_results:
161
+ # return ""
162
 
163
+ # context_parts = []
164
 
165
+ # for i, result in enumerate(processed_results, 1):
166
+ # doc_reference = f"[Document {i}: {result['filename']}"
167
+ # if result['page'] != 'Unknown':
168
+ # doc_reference += f", Page {result['page']}"
169
+ # if result['year'] != 'Unknown':
170
+ # doc_reference += f", Year {result['year']}"
171
+ # doc_reference += "]"
172
 
173
+ # context_part = f"{doc_reference}\n{result['answer']}\n"
174
+ # context_parts.append(context_part)
175
 
176
+ # return "\n".join(context_parts)
177
 
178
  # ---------------------------------------------------------------------
179
  # Core generation function for both Gradio UI and MCP
 
264
  if not context:
265
  return "Error: No retrieval results provided"
266
 
267
+ # # Process the retrieval results
268
+ # processed_results = extract_relevant_fields(context)
269
+ formatted_context = context
270
 
271
+ # if not formatted_context.strip():
272
+ # return "Error: No valid content found in retrieval results"
273
 
274
  elif isinstance(context, str):
275
  if not context.strip():
utils/retriever.py CHANGED
@@ -19,7 +19,7 @@ def retrieve_paragraphs(query, category = None):
19
  api_name="/retrieve"
20
  )
21
  return result
22
-
23
  except Exception as e:
24
  error_msg = f"Error retrieving paragraphs: {str(e)}"
25
  return (
 
19
  api_name="/retrieve"
20
  )
21
  return result
22
+
23
  except Exception as e:
24
  error_msg = f"Error retrieving paragraphs: {str(e)}"
25
  return (