Shreyas094 commited on
Commit
ac8a581
·
verified ·
1 Parent(s): 5554476

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -47
app.py CHANGED
@@ -352,38 +352,60 @@ def summarize_web_results(query: str, search_results: List[Dict[str, str]], conv
352
  except Exception as e:
353
  return f"An error occurred during summarization: {str(e)}"
354
 
355
- def get_response_from_excel(query, model, context, num_calls=3, temperature=0.2):
356
  logging.info(f"Getting response from Excel using model: {model}")
357
-
 
 
 
 
 
 
 
 
 
 
358
  messages = [
359
- {"role": "system", "content": "You are a highly specialized data analyst with expertise in analyzing and summarizing Excel spreadsheets. Your goal is to provide accurate, detailed, and precise information on the data provided. Focus on identifying key metrics, trends, and significant details relevant to the query. Avoid making assumptions or adding information that is not explicitly supported by the data."},
360
- {"role": "user", "content": f"Using the following data extracted from Excel spreadsheets:\n{context}\n\nPlease analyze this data and provide a comprehensive answer to the following question: '{query}'. If appropriate, suggest visualizations that could help illustrate the findings."}
361
  ]
362
 
 
363
  if model.startswith("duckduckgo/"):
364
- # Use DuckDuckGo chat with context
365
- return get_response_from_duckduckgo(query, model, context, num_calls, temperature)
 
 
 
 
 
 
 
 
366
  elif model == "@cf/meta/llama-3.1-8b-instruct":
367
- # Use Cloudflare API
368
- return get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="excel")
 
369
  else:
370
- # Use Hugging Face API
371
  client = InferenceClient(model, token=huggingface_token)
372
-
373
- response = ""
374
  for i in range(num_calls):
375
- logging.info(f"API call {i+1}/{num_calls}")
376
  for message in client.chat_completion(
377
  messages=messages,
378
- max_tokens=20000,
379
  temperature=temperature,
380
  stream=True,
381
  top_p=0.8,
382
  ):
383
  if message.choices and message.choices[0].delta and message.choices[0].delta.content:
384
  chunk = message.choices[0].delta.content
385
- response += chunk
386
- yield response # Yield partial response
 
 
 
 
 
387
 
388
  logging.info("Finished generating response for Excel data")
389
 
@@ -418,7 +440,7 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
418
  yield "Unable to generate a response. Please try a different query."
419
 
420
  else:
421
- try:
422
  embed = get_embeddings()
423
  pdf_database = None
424
  office_database = None
@@ -433,40 +455,34 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
433
  yield "No documents available. Please upload documents to answer questions."
434
  return
435
 
436
- all_relevant_docs = []
437
- if pdf_database:
438
- pdf_retriever = pdf_database.as_retriever(search_kwargs={"k": 10})
439
- all_relevant_docs.extend(pdf_retriever.get_relevant_documents(message))
440
 
441
- if office_database:
442
- office_retriever = office_database.as_retriever(search_kwargs={"k": 10})
443
- all_relevant_docs.extend(office_retriever.get_relevant_documents(message))
444
-
445
- relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
446
-
447
- if not relevant_docs:
448
- yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
449
- return
450
-
451
- # Separate Excel documents from others
452
- excel_docs = [doc for doc in relevant_docs if doc.metadata["source"].lower().endswith(('.xlsx', '.xls'))]
453
- other_docs = [doc for doc in relevant_docs if not doc.metadata["source"].lower().endswith(('.xlsx', '.xls'))]
454
-
455
- excel_context = "\n".join([doc.page_content for doc in excel_docs])
456
- other_context = "\n".join([doc.page_content for doc in other_docs])
457
-
458
- logging.info(f"Excel context length: {len(excel_context)}")
459
- logging.info(f"Other context length: {len(other_context)}")
460
-
461
- # Process Excel documents
462
- if excel_docs:
463
- for response in get_response_from_excel(message, model, excel_context, num_calls, temperature):
464
  yield response
465
-
466
- # Process other documents (PDF, Word)
467
- if other_docs:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
  if model == "@cf/meta/llama-3.1-8b-instruct":
469
- for response in get_response_from_cloudflare(prompt="", context=other_context, query=message, num_calls=num_calls, temperature=temperature, search_type="document"):
470
  yield response
471
  else:
472
  for response in get_response_from_pdf(message, model, selected_docs, num_calls, temperature):
 
352
  except Exception as e:
353
  return f"An error occurred during summarization: {str(e)}"
354
 
355
+ def get_response_from_excel(query, model, context, num_calls=1, temperature=0.2):
356
  logging.info(f"Getting response from Excel using model: {model}")
357
+
358
+ # Use embeddings to find the most relevant Excel data
359
+ embed = get_embeddings()
360
+ office_database = FAISS.load_local("office_faiss_database", embed, allow_dangerous_deserialization=True)
361
+ retriever = office_database.as_retriever(search_kwargs={"k": 5})
362
+ relevant_docs = retriever.get_relevant_documents(query)
363
+
364
+ # Prepare the context from relevant documents
365
+ excel_context = "\n".join([doc.page_content for doc in relevant_docs])
366
+
367
+ # Prepare the messages for the AI model
368
  messages = [
369
+ {"role": "system", "content": "You are an AI assistant specialized in analyzing Excel data. Your task is to provide accurate and detailed responses based solely on the given Excel data context. Do not make assumptions or add information beyond what is explicitly provided in the context."},
370
+ {"role": "user", "content": f"Based on the following Excel data:\n\n{excel_context}\n\nPlease answer this question: {query}\n\nProvide a step-by-step analysis if applicable, and ensure your response is factual and directly related to the provided Excel data."}
371
  ]
372
 
373
+ full_response = ""
374
  if model.startswith("duckduckgo/"):
375
+ for _ in range(num_calls):
376
+ try:
377
+ ddg_model = model.split('/')[-1]
378
+ results = DDGS().chat(messages[-1]["content"], model=ddg_model)
379
+ full_response += results + "\n"
380
+ logging.info(f"DuckDuckGo API response received for Excel query. Length: {len(results)}")
381
+ except Exception as e:
382
+ logging.error(f"Error in generating response from DuckDuckGo for Excel: {str(e)}")
383
+ yield f"An error occurred with the {model} model: {str(e)}. Please try again."
384
+ return
385
  elif model == "@cf/meta/llama-3.1-8b-instruct":
386
+ for response in get_response_from_cloudflare(prompt="", context=excel_context, query=query, num_calls=num_calls, temperature=temperature, search_type="excel"):
387
+ yield response
388
+ return
389
  else:
 
390
  client = InferenceClient(model, token=huggingface_token)
 
 
391
  for i in range(num_calls):
392
+ logging.info(f"API call {i+1}/{num_calls} for Excel query")
393
  for message in client.chat_completion(
394
  messages=messages,
395
+ max_tokens=1000,
396
  temperature=temperature,
397
  stream=True,
398
  top_p=0.8,
399
  ):
400
  if message.choices and message.choices[0].delta and message.choices[0].delta.content:
401
  chunk = message.choices[0].delta.content
402
+ full_response += chunk
403
+ yield full_response
404
+
405
+ if not full_response.strip():
406
+ yield "I couldn't generate a response based on the Excel data. Please try rephrasing your question or check if the relevant data is present in the uploaded Excel files."
407
+ else:
408
+ yield full_response.strip()
409
 
410
  logging.info("Finished generating response for Excel data")
411
 
 
440
  yield "Unable to generate a response. Please try a different query."
441
 
442
  else:
443
+ try:
444
  embed = get_embeddings()
445
  pdf_database = None
446
  office_database = None
 
455
  yield "No documents available. Please upload documents to answer questions."
456
  return
457
 
458
+ # Determine if the query is specifically for Excel data
459
+ excel_files = [doc for doc in selected_docs if doc.lower().endswith(('.xlsx', '.xls'))]
 
 
460
 
461
+ if excel_files:
462
+ # If Excel files are selected, use the Excel-specific function
463
+ for response in get_response_from_excel(message, model, "", num_calls, temperature):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
  yield response
465
+ else:
466
+ # Existing logic for PDF and other document types
467
+ all_relevant_docs = []
468
+ if pdf_database:
469
+ pdf_retriever = pdf_database.as_retriever(search_kwargs={"k": 10})
470
+ all_relevant_docs.extend(pdf_retriever.get_relevant_documents(message))
471
+
472
+ if office_database:
473
+ office_retriever = office_database.as_retriever(search_kwargs={"k": 10})
474
+ all_relevant_docs.extend(office_retriever.get_relevant_documents(message))
475
+
476
+ relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
477
+
478
+ if not relevant_docs:
479
+ yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
480
+ return
481
+
482
+ context_str = "\n".join([doc.page_content for doc in relevant_docs])
483
+
484
  if model == "@cf/meta/llama-3.1-8b-instruct":
485
+ for response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="document"):
486
  yield response
487
  else:
488
  for response in get_response_from_pdf(message, model, selected_docs, num_calls, temperature):