Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -352,38 +352,60 @@ def summarize_web_results(query: str, search_results: List[Dict[str, str]], conv
|
|
352 |
except Exception as e:
|
353 |
return f"An error occurred during summarization: {str(e)}"
|
354 |
|
355 |
-
def get_response_from_excel(query, model, context, num_calls=
|
356 |
logging.info(f"Getting response from Excel using model: {model}")
|
357 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
358 |
messages = [
|
359 |
-
{"role": "system", "content": "You are
|
360 |
-
{"role": "user", "content": f"
|
361 |
]
|
362 |
|
|
|
363 |
if model.startswith("duckduckgo/"):
|
364 |
-
|
365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
366 |
elif model == "@cf/meta/llama-3.1-8b-instruct":
|
367 |
-
|
368 |
-
|
|
|
369 |
else:
|
370 |
-
# Use Hugging Face API
|
371 |
client = InferenceClient(model, token=huggingface_token)
|
372 |
-
|
373 |
-
response = ""
|
374 |
for i in range(num_calls):
|
375 |
-
logging.info(f"API call {i+1}/{num_calls}")
|
376 |
for message in client.chat_completion(
|
377 |
messages=messages,
|
378 |
-
max_tokens=
|
379 |
temperature=temperature,
|
380 |
stream=True,
|
381 |
top_p=0.8,
|
382 |
):
|
383 |
if message.choices and message.choices[0].delta and message.choices[0].delta.content:
|
384 |
chunk = message.choices[0].delta.content
|
385 |
-
|
386 |
-
yield
|
|
|
|
|
|
|
|
|
|
|
387 |
|
388 |
logging.info("Finished generating response for Excel data")
|
389 |
|
@@ -418,7 +440,7 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
|
|
418 |
yield "Unable to generate a response. Please try a different query."
|
419 |
|
420 |
else:
|
421 |
-
|
422 |
embed = get_embeddings()
|
423 |
pdf_database = None
|
424 |
office_database = None
|
@@ -433,40 +455,34 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
|
|
433 |
yield "No documents available. Please upload documents to answer questions."
|
434 |
return
|
435 |
|
436 |
-
|
437 |
-
if
|
438 |
-
pdf_retriever = pdf_database.as_retriever(search_kwargs={"k": 10})
|
439 |
-
all_relevant_docs.extend(pdf_retriever.get_relevant_documents(message))
|
440 |
|
441 |
-
if
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
|
446 |
-
|
447 |
-
if not relevant_docs:
|
448 |
-
yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
|
449 |
-
return
|
450 |
-
|
451 |
-
# Separate Excel documents from others
|
452 |
-
excel_docs = [doc for doc in relevant_docs if doc.metadata["source"].lower().endswith(('.xlsx', '.xls'))]
|
453 |
-
other_docs = [doc for doc in relevant_docs if not doc.metadata["source"].lower().endswith(('.xlsx', '.xls'))]
|
454 |
-
|
455 |
-
excel_context = "\n".join([doc.page_content for doc in excel_docs])
|
456 |
-
other_context = "\n".join([doc.page_content for doc in other_docs])
|
457 |
-
|
458 |
-
logging.info(f"Excel context length: {len(excel_context)}")
|
459 |
-
logging.info(f"Other context length: {len(other_context)}")
|
460 |
-
|
461 |
-
# Process Excel documents
|
462 |
-
if excel_docs:
|
463 |
-
for response in get_response_from_excel(message, model, excel_context, num_calls, temperature):
|
464 |
yield response
|
465 |
-
|
466 |
-
|
467 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
468 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|
469 |
-
for response in get_response_from_cloudflare(prompt="", context=
|
470 |
yield response
|
471 |
else:
|
472 |
for response in get_response_from_pdf(message, model, selected_docs, num_calls, temperature):
|
|
|
352 |
except Exception as e:
|
353 |
return f"An error occurred during summarization: {str(e)}"
|
354 |
|
355 |
+
def get_response_from_excel(query, model, context, num_calls=1, temperature=0.2):
|
356 |
logging.info(f"Getting response from Excel using model: {model}")
|
357 |
+
|
358 |
+
# Use embeddings to find the most relevant Excel data
|
359 |
+
embed = get_embeddings()
|
360 |
+
office_database = FAISS.load_local("office_faiss_database", embed, allow_dangerous_deserialization=True)
|
361 |
+
retriever = office_database.as_retriever(search_kwargs={"k": 5})
|
362 |
+
relevant_docs = retriever.get_relevant_documents(query)
|
363 |
+
|
364 |
+
# Prepare the context from relevant documents
|
365 |
+
excel_context = "\n".join([doc.page_content for doc in relevant_docs])
|
366 |
+
|
367 |
+
# Prepare the messages for the AI model
|
368 |
messages = [
|
369 |
+
{"role": "system", "content": "You are an AI assistant specialized in analyzing Excel data. Your task is to provide accurate and detailed responses based solely on the given Excel data context. Do not make assumptions or add information beyond what is explicitly provided in the context."},
|
370 |
+
{"role": "user", "content": f"Based on the following Excel data:\n\n{excel_context}\n\nPlease answer this question: {query}\n\nProvide a step-by-step analysis if applicable, and ensure your response is factual and directly related to the provided Excel data."}
|
371 |
]
|
372 |
|
373 |
+
full_response = ""
|
374 |
if model.startswith("duckduckgo/"):
|
375 |
+
for _ in range(num_calls):
|
376 |
+
try:
|
377 |
+
ddg_model = model.split('/')[-1]
|
378 |
+
results = DDGS().chat(messages[-1]["content"], model=ddg_model)
|
379 |
+
full_response += results + "\n"
|
380 |
+
logging.info(f"DuckDuckGo API response received for Excel query. Length: {len(results)}")
|
381 |
+
except Exception as e:
|
382 |
+
logging.error(f"Error in generating response from DuckDuckGo for Excel: {str(e)}")
|
383 |
+
yield f"An error occurred with the {model} model: {str(e)}. Please try again."
|
384 |
+
return
|
385 |
elif model == "@cf/meta/llama-3.1-8b-instruct":
|
386 |
+
for response in get_response_from_cloudflare(prompt="", context=excel_context, query=query, num_calls=num_calls, temperature=temperature, search_type="excel"):
|
387 |
+
yield response
|
388 |
+
return
|
389 |
else:
|
|
|
390 |
client = InferenceClient(model, token=huggingface_token)
|
|
|
|
|
391 |
for i in range(num_calls):
|
392 |
+
logging.info(f"API call {i+1}/{num_calls} for Excel query")
|
393 |
for message in client.chat_completion(
|
394 |
messages=messages,
|
395 |
+
max_tokens=1000,
|
396 |
temperature=temperature,
|
397 |
stream=True,
|
398 |
top_p=0.8,
|
399 |
):
|
400 |
if message.choices and message.choices[0].delta and message.choices[0].delta.content:
|
401 |
chunk = message.choices[0].delta.content
|
402 |
+
full_response += chunk
|
403 |
+
yield full_response
|
404 |
+
|
405 |
+
if not full_response.strip():
|
406 |
+
yield "I couldn't generate a response based on the Excel data. Please try rephrasing your question or check if the relevant data is present in the uploaded Excel files."
|
407 |
+
else:
|
408 |
+
yield full_response.strip()
|
409 |
|
410 |
logging.info("Finished generating response for Excel data")
|
411 |
|
|
|
440 |
yield "Unable to generate a response. Please try a different query."
|
441 |
|
442 |
else:
|
443 |
+
try:
|
444 |
embed = get_embeddings()
|
445 |
pdf_database = None
|
446 |
office_database = None
|
|
|
455 |
yield "No documents available. Please upload documents to answer questions."
|
456 |
return
|
457 |
|
458 |
+
# Determine if the query is specifically for Excel data
|
459 |
+
excel_files = [doc for doc in selected_docs if doc.lower().endswith(('.xlsx', '.xls'))]
|
|
|
|
|
460 |
|
461 |
+
if excel_files:
|
462 |
+
# If Excel files are selected, use the Excel-specific function
|
463 |
+
for response in get_response_from_excel(message, model, "", num_calls, temperature):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
464 |
yield response
|
465 |
+
else:
|
466 |
+
# Existing logic for PDF and other document types
|
467 |
+
all_relevant_docs = []
|
468 |
+
if pdf_database:
|
469 |
+
pdf_retriever = pdf_database.as_retriever(search_kwargs={"k": 10})
|
470 |
+
all_relevant_docs.extend(pdf_retriever.get_relevant_documents(message))
|
471 |
+
|
472 |
+
if office_database:
|
473 |
+
office_retriever = office_database.as_retriever(search_kwargs={"k": 10})
|
474 |
+
all_relevant_docs.extend(office_retriever.get_relevant_documents(message))
|
475 |
+
|
476 |
+
relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
|
477 |
+
|
478 |
+
if not relevant_docs:
|
479 |
+
yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
|
480 |
+
return
|
481 |
+
|
482 |
+
context_str = "\n".join([doc.page_content for doc in relevant_docs])
|
483 |
+
|
484 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|
485 |
+
for response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="document"):
|
486 |
yield response
|
487 |
else:
|
488 |
for response in get_response_from_pdf(message, model, selected_docs, num_calls, temperature):
|