Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -352,6 +352,41 @@ def summarize_web_results(query: str, search_results: List[Dict[str, str]], conv
|
|
352 |
except Exception as e:
|
353 |
return f"An error occurred during summarization: {str(e)}"
|
354 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
# Modify the existing respond function to handle both PDF and web search
|
356 |
def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
|
357 |
logging.info(f"User Query: {message}")
|
@@ -381,8 +416,8 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
|
|
381 |
yield final_summary
|
382 |
else:
|
383 |
yield "Unable to generate a response. Please try a different query."
|
|
|
384 |
else:
|
385 |
-
# PDF and Office documents search logic
|
386 |
try:
|
387 |
embed = get_embeddings()
|
388 |
pdf_database = None
|
@@ -413,44 +448,29 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
|
|
413 |
yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
|
414 |
return
|
415 |
|
416 |
-
|
417 |
-
|
|
|
|
|
|
|
|
|
418 |
|
419 |
-
|
420 |
-
|
421 |
-
logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
|
422 |
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
for response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="document"):
|
427 |
yield response
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
response = ""
|
439 |
-
for i in range(num_calls):
|
440 |
-
logging.info(f"API call {i+1}/{num_calls}")
|
441 |
-
for message in client.chat_completion(
|
442 |
-
messages=messages,
|
443 |
-
max_tokens=20000,
|
444 |
-
temperature=temperature,
|
445 |
-
stream=True,
|
446 |
-
top_p=0.8,
|
447 |
-
):
|
448 |
-
if message.choices and message.choices[0].delta and message.choices[0].delta.content:
|
449 |
-
chunk = message.choices[0].delta.content
|
450 |
-
response += chunk
|
451 |
-
yield response # Yield partial response
|
452 |
-
|
453 |
-
logging.info("Finished generating response")
|
454 |
|
455 |
except Exception as e:
|
456 |
logging.error(f"Error with {model}: {str(e)}")
|
|
|
352 |
except Exception as e:
|
353 |
return f"An error occurred during summarization: {str(e)}"
|
354 |
|
355 |
+
def get_response_from_excel(query, model, context, num_calls=3, temperature=0.2):
|
356 |
+
logging.info(f"Getting response from Excel using model: {model}")
|
357 |
+
|
358 |
+
messages = [
|
359 |
+
{"role": "system", "content": "You are a highly specialized data analyst with expertise in analyzing and summarizing Excel spreadsheets. Your goal is to provide accurate, detailed, and precise summaries based on the data provided. Focus on identifying key metrics, trends, and significant details relevant to the query. Avoid making assumptions or adding information that is not explicitly supported by the data."},
|
360 |
+
{"role": "user", "content": f"Using the following data extracted from Excel spreadsheets:\n{context}\n\nPlease analyze this data and provide a comprehensive answer to the following question: '{query}'. Include relevant statistics, trends, and insights. If appropriate, suggest visualizations that could help illustrate the findings."}
|
361 |
+
]
|
362 |
+
|
363 |
+
if model.startswith("duckduckgo/"):
|
364 |
+
# Use DuckDuckGo chat with context
|
365 |
+
return get_response_from_duckduckgo(query, model, context, num_calls, temperature)
|
366 |
+
elif model == "@cf/meta/llama-3.1-8b-instruct":
|
367 |
+
# Use Cloudflare API
|
368 |
+
return get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="excel")
|
369 |
+
else:
|
370 |
+
# Use Hugging Face API
|
371 |
+
client = InferenceClient(model, token=huggingface_token)
|
372 |
+
|
373 |
+
response = ""
|
374 |
+
for i in range(num_calls):
|
375 |
+
logging.info(f"API call {i+1}/{num_calls}")
|
376 |
+
for message in client.chat_completion(
|
377 |
+
messages=messages,
|
378 |
+
max_tokens=20000,
|
379 |
+
temperature=temperature,
|
380 |
+
stream=True,
|
381 |
+
top_p=0.8,
|
382 |
+
):
|
383 |
+
if message.choices and message.choices[0].delta and message.choices[0].delta.content:
|
384 |
+
chunk = message.choices[0].delta.content
|
385 |
+
response += chunk
|
386 |
+
yield response # Yield partial response
|
387 |
+
|
388 |
+
logging.info("Finished generating response for Excel data")
|
389 |
+
|
390 |
# Modify the existing respond function to handle both PDF and web search
|
391 |
def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
|
392 |
logging.info(f"User Query: {message}")
|
|
|
416 |
yield final_summary
|
417 |
else:
|
418 |
yield "Unable to generate a response. Please try a different query."
|
419 |
+
|
420 |
else:
|
|
|
421 |
try:
|
422 |
embed = get_embeddings()
|
423 |
pdf_database = None
|
|
|
448 |
yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
|
449 |
return
|
450 |
|
451 |
+
# Separate Excel documents from others
|
452 |
+
excel_docs = [doc for doc in relevant_docs if doc.metadata["source"].lower().endswith(('.xlsx', '.xls'))]
|
453 |
+
other_docs = [doc for doc in relevant_docs if not doc.metadata["source"].lower().endswith(('.xlsx', '.xls'))]
|
454 |
+
|
455 |
+
excel_context = "\n".join([doc.page_content for doc in excel_docs])
|
456 |
+
other_context = "\n".join([doc.page_content for doc in other_docs])
|
457 |
|
458 |
+
logging.info(f"Excel context length: {len(excel_context)}")
|
459 |
+
logging.info(f"Other context length: {len(other_context)}")
|
|
|
460 |
|
461 |
+
# Process Excel documents
|
462 |
+
if excel_docs:
|
463 |
+
for response in get_response_from_excel(message, model, excel_context, num_calls, temperature):
|
|
|
464 |
yield response
|
465 |
+
|
466 |
+
# Process other documents (PDF, Word)
|
467 |
+
if other_docs:
|
468 |
+
if model == "@cf/meta/llama-3.1-8b-instruct":
|
469 |
+
for response in get_response_from_cloudflare(prompt="", context=other_context, query=message, num_calls=num_calls, temperature=temperature, search_type="document"):
|
470 |
+
yield response
|
471 |
+
else:
|
472 |
+
for response in get_response_from_pdf(message, model, selected_docs, num_calls, temperature):
|
473 |
+
yield response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
474 |
|
475 |
except Exception as e:
|
476 |
logging.error(f"Error with {model}: {str(e)}")
|