Shreyas094 commited on
Commit
05627b5
·
verified ·
1 Parent(s): 448313a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -63
app.py CHANGED
@@ -352,60 +352,38 @@ def summarize_web_results(query: str, search_results: List[Dict[str, str]], conv
352
  except Exception as e:
353
  return f"An error occurred during summarization: {str(e)}"
354
 
355
- def get_response_from_excel(query, model, context, num_calls=1, temperature=0.2):
356
  logging.info(f"Getting response from Excel using model: {model}")
357
-
358
- # Use embeddings to find the most relevant Excel data
359
- embed = get_embeddings()
360
- office_database = FAISS.load_local("office_faiss_database", embed, allow_dangerous_deserialization=True)
361
- retriever = office_database.as_retriever(search_kwargs={"k": 5})
362
- relevant_docs = retriever.get_relevant_documents(query)
363
-
364
- # Prepare the context from relevant documents
365
- excel_context = "\n".join([doc.page_content for doc in relevant_docs])
366
-
367
- # Prepare the messages for the AI model
368
  messages = [
369
- {"role": "system", "content": "You are an AI assistant specialized in analyzing Excel data. Your task is to provide accurate and detailed responses based solely on the given Excel data context. Do not make assumptions or add information beyond what is explicitly provided in the context."},
370
- {"role": "user", "content": f"Based on the following Excel data:\n\n{excel_context}\n\nPlease answer this question: {query}\n\nProvide a step-by-step analysis if applicable, and ensure your response is factual and directly related to the provided Excel data."}
371
  ]
372
 
373
- full_response = ""
374
  if model.startswith("duckduckgo/"):
375
- for _ in range(num_calls):
376
- try:
377
- ddg_model = model.split('/')[-1]
378
- results = DDGS().chat(messages[-1]["content"], model=ddg_model)
379
- full_response += results + "\n"
380
- logging.info(f"DuckDuckGo API response received for Excel query. Length: {len(results)}")
381
- except Exception as e:
382
- logging.error(f"Error in generating response from DuckDuckGo for Excel: {str(e)}")
383
- yield f"An error occurred with the {model} model: {str(e)}. Please try again."
384
- return
385
  elif model == "@cf/meta/llama-3.1-8b-instruct":
386
- for response in get_response_from_cloudflare(prompt="", context=excel_context, query=query, num_calls=num_calls, temperature=temperature, search_type="excel"):
387
- yield response
388
- return
389
  else:
 
390
  client = InferenceClient(model, token=huggingface_token)
 
 
391
  for i in range(num_calls):
392
- logging.info(f"API call {i+1}/{num_calls} for Excel query")
393
  for message in client.chat_completion(
394
  messages=messages,
395
- max_tokens=1000,
396
  temperature=temperature,
397
  stream=True,
398
  top_p=0.8,
399
  ):
400
  if message.choices and message.choices[0].delta and message.choices[0].delta.content:
401
  chunk = message.choices[0].delta.content
402
- full_response += chunk
403
- yield full_response
404
-
405
- if not full_response.strip():
406
- yield "I couldn't generate a response based on the Excel data. Please try rephrasing your question or check if the relevant data is present in the uploaded Excel files."
407
- else:
408
- yield full_response.strip()
409
 
410
  logging.info("Finished generating response for Excel data")
411
 
@@ -455,38 +433,45 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
455
  yield "No documents available. Please upload documents to answer questions."
456
  return
457
 
458
- # Determine if the query is specifically for Excel data
459
- excel_files = [doc for doc in selected_docs if doc.lower().endswith(('.xlsx', '.xls'))]
 
 
460
 
461
- if excel_files:
462
- # If Excel files are selected, use the Excel-specific function
463
- for response in get_response_from_excel(message, model, "", num_calls, temperature):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
  yield response
465
- else:
466
- # Existing logic for PDF and other document types
467
- all_relevant_docs = []
468
- if pdf_database:
469
- pdf_retriever = pdf_database.as_retriever(search_kwargs={"k": 10})
470
- all_relevant_docs.extend(pdf_retriever.get_relevant_documents(message))
471
-
472
- if office_database:
473
- office_retriever = office_database.as_retriever(search_kwargs={"k": 10})
474
- all_relevant_docs.extend(office_retriever.get_relevant_documents(message))
475
-
476
- relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
477
-
478
- if not relevant_docs:
479
- yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
480
- return
481
-
482
- context_str = "\n".join([doc.page_content for doc in relevant_docs])
483
-
484
  if model == "@cf/meta/llama-3.1-8b-instruct":
485
- for response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="document"):
486
  yield response
487
  else:
488
  for response in get_response_from_pdf(message, model, selected_docs, num_calls, temperature):
489
  yield response
 
490
  except Exception as e:
491
  logging.error(f"Error with {model}: {str(e)}")
492
  if "microsoft/Phi-3-mini-4k-instruct" in model:
@@ -495,7 +480,7 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
495
  yield from respond(message, history, fallback_model, temperature, num_calls, use_web_search, selected_docs)
496
  else:
497
  yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
498
-
499
  logging.basicConfig(level=logging.DEBUG)
500
 
501
  def get_response_from_cloudflare(prompt, context, query, num_calls=3, temperature=0.2, search_type="pdf"):
 
352
  except Exception as e:
353
  return f"An error occurred during summarization: {str(e)}"
354
 
355
+ def get_response_from_excel(query, model, context, num_calls=3, temperature=0.2):
356
  logging.info(f"Getting response from Excel using model: {model}")
357
+
 
 
 
 
 
 
 
 
 
 
358
  messages = [
359
+ {"role": "system", "content": "You are a highly specialized data analyst with expertise in analyzing and summarizing Excel spreadsheets. Your goal is to provide accurate, detailed, and precise information on the data provided. Focus on identifying key metrics, trends, and significant details relevant to the query. Avoid making assumptions or adding information that is not explicitly supported by the data."},
360
+ {"role": "user", "content": f"Using the following data extracted from Excel spreadsheets:\n{context}\n\nPlease analyze this data and provide a comprehensive answer to the following question: '{query}'. If appropriate, suggest visualizations that could help illustrate the findings."}
361
  ]
362
 
 
363
  if model.startswith("duckduckgo/"):
364
+ # Use DuckDuckGo chat with context
365
+ return get_response_from_duckduckgo(query, model, context, num_calls, temperature)
 
 
 
 
 
 
 
 
366
  elif model == "@cf/meta/llama-3.1-8b-instruct":
367
+ # Use Cloudflare API
368
+ return get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="excel")
 
369
  else:
370
+ # Use Hugging Face API
371
  client = InferenceClient(model, token=huggingface_token)
372
+
373
+ response = ""
374
  for i in range(num_calls):
375
+ logging.info(f"API call {i+1}/{num_calls}")
376
  for message in client.chat_completion(
377
  messages=messages,
378
+ max_tokens=20000,
379
  temperature=temperature,
380
  stream=True,
381
  top_p=0.8,
382
  ):
383
  if message.choices and message.choices[0].delta and message.choices[0].delta.content:
384
  chunk = message.choices[0].delta.content
385
+ response += chunk
386
+ yield response # Yield partial response
 
 
 
 
 
387
 
388
  logging.info("Finished generating response for Excel data")
389
 
 
433
  yield "No documents available. Please upload documents to answer questions."
434
  return
435
 
436
+ all_relevant_docs = []
437
+ if pdf_database:
438
+ pdf_retriever = pdf_database.as_retriever(search_kwargs={"k": 10})
439
+ all_relevant_docs.extend(pdf_retriever.get_relevant_documents(message))
440
 
441
+ if office_database:
442
+ office_retriever = office_database.as_retriever(search_kwargs={"k": 10})
443
+ all_relevant_docs.extend(office_retriever.get_relevant_documents(message))
444
+
445
+ relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
446
+
447
+ if not relevant_docs:
448
+ yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
449
+ return
450
+
451
+ # Separate Excel documents from others
452
+ excel_docs = [doc for doc in relevant_docs if doc.metadata["source"].lower().endswith(('.xlsx', '.xls'))]
453
+ other_docs = [doc for doc in relevant_docs if not doc.metadata["source"].lower().endswith(('.xlsx', '.xls'))]
454
+
455
+ excel_context = "\n".join([doc.page_content for doc in excel_docs])
456
+ other_context = "\n".join([doc.page_content for doc in other_docs])
457
+
458
+ logging.info(f"Excel context length: {len(excel_context)}")
459
+ logging.info(f"Other context length: {len(other_context)}")
460
+
461
+ # Process Excel documents
462
+ if excel_docs:
463
+ for response in get_response_from_excel(message, model, excel_context, num_calls, temperature):
464
  yield response
465
+
466
+ # Process other documents (PDF, Word)
467
+ if other_docs:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
  if model == "@cf/meta/llama-3.1-8b-instruct":
469
+ for response in get_response_from_cloudflare(prompt="", context=other_context, query=message, num_calls=num_calls, temperature=temperature, search_type="document"):
470
  yield response
471
  else:
472
  for response in get_response_from_pdf(message, model, selected_docs, num_calls, temperature):
473
  yield response
474
+
475
  except Exception as e:
476
  logging.error(f"Error with {model}: {str(e)}")
477
  if "microsoft/Phi-3-mini-4k-instruct" in model:
 
480
  yield from respond(message, history, fallback_model, temperature, num_calls, use_web_search, selected_docs)
481
  else:
482
  yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
483
+
484
  logging.basicConfig(level=logging.DEBUG)
485
 
486
  def get_response_from_cloudflare(prompt, context, query, num_calls=3, temperature=0.2, search_type="pdf"):