Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -352,60 +352,38 @@ def summarize_web_results(query: str, search_results: List[Dict[str, str]], conv
|
|
352 |
except Exception as e:
|
353 |
return f"An error occurred during summarization: {str(e)}"
|
354 |
|
355 |
-
def get_response_from_excel(query, model, context, num_calls=
|
356 |
logging.info(f"Getting response from Excel using model: {model}")
|
357 |
-
|
358 |
-
# Use embeddings to find the most relevant Excel data
|
359 |
-
embed = get_embeddings()
|
360 |
-
office_database = FAISS.load_local("office_faiss_database", embed, allow_dangerous_deserialization=True)
|
361 |
-
retriever = office_database.as_retriever(search_kwargs={"k": 5})
|
362 |
-
relevant_docs = retriever.get_relevant_documents(query)
|
363 |
-
|
364 |
-
# Prepare the context from relevant documents
|
365 |
-
excel_context = "\n".join([doc.page_content for doc in relevant_docs])
|
366 |
-
|
367 |
-
# Prepare the messages for the AI model
|
368 |
messages = [
|
369 |
-
{"role": "system", "content": "You are
|
370 |
-
{"role": "user", "content": f"
|
371 |
]
|
372 |
|
373 |
-
full_response = ""
|
374 |
if model.startswith("duckduckgo/"):
|
375 |
-
|
376 |
-
|
377 |
-
ddg_model = model.split('/')[-1]
|
378 |
-
results = DDGS().chat(messages[-1]["content"], model=ddg_model)
|
379 |
-
full_response += results + "\n"
|
380 |
-
logging.info(f"DuckDuckGo API response received for Excel query. Length: {len(results)}")
|
381 |
-
except Exception as e:
|
382 |
-
logging.error(f"Error in generating response from DuckDuckGo for Excel: {str(e)}")
|
383 |
-
yield f"An error occurred with the {model} model: {str(e)}. Please try again."
|
384 |
-
return
|
385 |
elif model == "@cf/meta/llama-3.1-8b-instruct":
|
386 |
-
|
387 |
-
|
388 |
-
return
|
389 |
else:
|
|
|
390 |
client = InferenceClient(model, token=huggingface_token)
|
|
|
|
|
391 |
for i in range(num_calls):
|
392 |
-
logging.info(f"API call {i+1}/{num_calls}
|
393 |
for message in client.chat_completion(
|
394 |
messages=messages,
|
395 |
-
max_tokens=
|
396 |
temperature=temperature,
|
397 |
stream=True,
|
398 |
top_p=0.8,
|
399 |
):
|
400 |
if message.choices and message.choices[0].delta and message.choices[0].delta.content:
|
401 |
chunk = message.choices[0].delta.content
|
402 |
-
|
403 |
-
yield
|
404 |
-
|
405 |
-
if not full_response.strip():
|
406 |
-
yield "I couldn't generate a response based on the Excel data. Please try rephrasing your question or check if the relevant data is present in the uploaded Excel files."
|
407 |
-
else:
|
408 |
-
yield full_response.strip()
|
409 |
|
410 |
logging.info("Finished generating response for Excel data")
|
411 |
|
@@ -455,38 +433,45 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
|
|
455 |
yield "No documents available. Please upload documents to answer questions."
|
456 |
return
|
457 |
|
458 |
-
|
459 |
-
|
|
|
|
|
460 |
|
461 |
-
if
|
462 |
-
|
463 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
464 |
yield response
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
if pdf_database:
|
469 |
-
pdf_retriever = pdf_database.as_retriever(search_kwargs={"k": 10})
|
470 |
-
all_relevant_docs.extend(pdf_retriever.get_relevant_documents(message))
|
471 |
-
|
472 |
-
if office_database:
|
473 |
-
office_retriever = office_database.as_retriever(search_kwargs={"k": 10})
|
474 |
-
all_relevant_docs.extend(office_retriever.get_relevant_documents(message))
|
475 |
-
|
476 |
-
relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
|
477 |
-
|
478 |
-
if not relevant_docs:
|
479 |
-
yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
|
480 |
-
return
|
481 |
-
|
482 |
-
context_str = "\n".join([doc.page_content for doc in relevant_docs])
|
483 |
-
|
484 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|
485 |
-
for response in get_response_from_cloudflare(prompt="", context=
|
486 |
yield response
|
487 |
else:
|
488 |
for response in get_response_from_pdf(message, model, selected_docs, num_calls, temperature):
|
489 |
yield response
|
|
|
490 |
except Exception as e:
|
491 |
logging.error(f"Error with {model}: {str(e)}")
|
492 |
if "microsoft/Phi-3-mini-4k-instruct" in model:
|
@@ -495,7 +480,7 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
|
|
495 |
yield from respond(message, history, fallback_model, temperature, num_calls, use_web_search, selected_docs)
|
496 |
else:
|
497 |
yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
|
498 |
-
|
499 |
logging.basicConfig(level=logging.DEBUG)
|
500 |
|
501 |
def get_response_from_cloudflare(prompt, context, query, num_calls=3, temperature=0.2, search_type="pdf"):
|
|
|
352 |
except Exception as e:
|
353 |
return f"An error occurred during summarization: {str(e)}"
|
354 |
|
355 |
+
def get_response_from_excel(query, model, context, num_calls=3, temperature=0.2):
|
356 |
logging.info(f"Getting response from Excel using model: {model}")
|
357 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
358 |
messages = [
|
359 |
+
{"role": "system", "content": "You are a highly specialized data analyst with expertise in analyzing and summarizing Excel spreadsheets. Your goal is to provide accurate, detailed, and precise information on the data provided. Focus on identifying key metrics, trends, and significant details relevant to the query. Avoid making assumptions or adding information that is not explicitly supported by the data."},
|
360 |
+
{"role": "user", "content": f"Using the following data extracted from Excel spreadsheets:\n{context}\n\nPlease analyze this data and provide a comprehensive answer to the following question: '{query}'. If appropriate, suggest visualizations that could help illustrate the findings."}
|
361 |
]
|
362 |
|
|
|
363 |
if model.startswith("duckduckgo/"):
|
364 |
+
# Use DuckDuckGo chat with context
|
365 |
+
return get_response_from_duckduckgo(query, model, context, num_calls, temperature)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
366 |
elif model == "@cf/meta/llama-3.1-8b-instruct":
|
367 |
+
# Use Cloudflare API
|
368 |
+
return get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="excel")
|
|
|
369 |
else:
|
370 |
+
# Use Hugging Face API
|
371 |
client = InferenceClient(model, token=huggingface_token)
|
372 |
+
|
373 |
+
response = ""
|
374 |
for i in range(num_calls):
|
375 |
+
logging.info(f"API call {i+1}/{num_calls}")
|
376 |
for message in client.chat_completion(
|
377 |
messages=messages,
|
378 |
+
max_tokens=20000,
|
379 |
temperature=temperature,
|
380 |
stream=True,
|
381 |
top_p=0.8,
|
382 |
):
|
383 |
if message.choices and message.choices[0].delta and message.choices[0].delta.content:
|
384 |
chunk = message.choices[0].delta.content
|
385 |
+
response += chunk
|
386 |
+
yield response # Yield partial response
|
|
|
|
|
|
|
|
|
|
|
387 |
|
388 |
logging.info("Finished generating response for Excel data")
|
389 |
|
|
|
433 |
yield "No documents available. Please upload documents to answer questions."
|
434 |
return
|
435 |
|
436 |
+
all_relevant_docs = []
|
437 |
+
if pdf_database:
|
438 |
+
pdf_retriever = pdf_database.as_retriever(search_kwargs={"k": 10})
|
439 |
+
all_relevant_docs.extend(pdf_retriever.get_relevant_documents(message))
|
440 |
|
441 |
+
if office_database:
|
442 |
+
office_retriever = office_database.as_retriever(search_kwargs={"k": 10})
|
443 |
+
all_relevant_docs.extend(office_retriever.get_relevant_documents(message))
|
444 |
+
|
445 |
+
relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
|
446 |
+
|
447 |
+
if not relevant_docs:
|
448 |
+
yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
|
449 |
+
return
|
450 |
+
|
451 |
+
# Separate Excel documents from others
|
452 |
+
excel_docs = [doc for doc in relevant_docs if doc.metadata["source"].lower().endswith(('.xlsx', '.xls'))]
|
453 |
+
other_docs = [doc for doc in relevant_docs if not doc.metadata["source"].lower().endswith(('.xlsx', '.xls'))]
|
454 |
+
|
455 |
+
excel_context = "\n".join([doc.page_content for doc in excel_docs])
|
456 |
+
other_context = "\n".join([doc.page_content for doc in other_docs])
|
457 |
+
|
458 |
+
logging.info(f"Excel context length: {len(excel_context)}")
|
459 |
+
logging.info(f"Other context length: {len(other_context)}")
|
460 |
+
|
461 |
+
# Process Excel documents
|
462 |
+
if excel_docs:
|
463 |
+
for response in get_response_from_excel(message, model, excel_context, num_calls, temperature):
|
464 |
yield response
|
465 |
+
|
466 |
+
# Process other documents (PDF, Word)
|
467 |
+
if other_docs:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
468 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|
469 |
+
for response in get_response_from_cloudflare(prompt="", context=other_context, query=message, num_calls=num_calls, temperature=temperature, search_type="document"):
|
470 |
yield response
|
471 |
else:
|
472 |
for response in get_response_from_pdf(message, model, selected_docs, num_calls, temperature):
|
473 |
yield response
|
474 |
+
|
475 |
except Exception as e:
|
476 |
logging.error(f"Error with {model}: {str(e)}")
|
477 |
if "microsoft/Phi-3-mini-4k-instruct" in model:
|
|
|
480 |
yield from respond(message, history, fallback_model, temperature, num_calls, use_web_search, selected_docs)
|
481 |
else:
|
482 |
yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
|
483 |
+
|
484 |
logging.basicConfig(level=logging.DEBUG)
|
485 |
|
486 |
def get_response_from_cloudflare(prompt, context, query, num_calls=3, temperature=0.2, search_type="pdf"):
|