Shreyas094 commited on
Commit
e3b0733
·
verified ·
1 Parent(s): cca553c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -36
app.py CHANGED
@@ -352,6 +352,41 @@ def summarize_web_results(query: str, search_results: List[Dict[str, str]], conv
352
  except Exception as e:
353
  return f"An error occurred during summarization: {str(e)}"
354
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  # Modify the existing respond function to handle both PDF and web search
356
  def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
357
  logging.info(f"User Query: {message}")
@@ -381,8 +416,8 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
381
  yield final_summary
382
  else:
383
  yield "Unable to generate a response. Please try a different query."
 
384
  else:
385
- # PDF and Office documents search logic
386
  try:
387
  embed = get_embeddings()
388
  pdf_database = None
@@ -413,44 +448,29 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
413
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
414
  return
415
 
416
- context_str = "\n".join([doc.page_content for doc in relevant_docs])
417
- logging.info(f"Total context length: {len(context_str)}")
 
 
 
 
418
 
419
- for doc in relevant_docs:
420
- logging.info(f"Document source: {doc.metadata['source']}")
421
- logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
422
 
423
- if model == "@cf/meta/llama-3.1-8b-instruct":
424
- logging.info("Using Cloudflare API")
425
- # Use Cloudflare API with the retrieved context
426
- for response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="document"):
427
  yield response
428
- else:
429
- logging.info("Using Hugging Face API")
430
- # Use Hugging Face API
431
- messages = [
432
- {"role": "system", "content": "You are a highly specialized assistant with expertise in analyzing and summarizing various types of documents including PDFs, Word documents, and Excel spreadsheets. Your goal is to provide accurate, detailed, and precise summaries based on the context provided. Avoid making assumptions or adding information that is not explicitly supported by the context from the documents."},
433
- {"role": "user", "content": f"Using the following context from the uploaded documents:\n{context_str}\n\nPlease generate a step-by-step reasoning before arriving at a comprehensive and accurate summary addressing the following question: '{message}'. Ensure your response is strictly based on the provided context, highlighting key metrics, trends, and significant details relevant to the query. Avoid any speculative or unverified information."}
434
- ]
435
-
436
- client = InferenceClient(model, token=huggingface_token)
437
-
438
- response = ""
439
- for i in range(num_calls):
440
- logging.info(f"API call {i+1}/{num_calls}")
441
- for message in client.chat_completion(
442
- messages=messages,
443
- max_tokens=20000,
444
- temperature=temperature,
445
- stream=True,
446
- top_p=0.8,
447
- ):
448
- if message.choices and message.choices[0].delta and message.choices[0].delta.content:
449
- chunk = message.choices[0].delta.content
450
- response += chunk
451
- yield response # Yield partial response
452
-
453
- logging.info("Finished generating response")
454
 
455
  except Exception as e:
456
  logging.error(f"Error with {model}: {str(e)}")
 
352
  except Exception as e:
353
  return f"An error occurred during summarization: {str(e)}"
354
 
355
+ def get_response_from_excel(query, model, context, num_calls=3, temperature=0.2):
356
+ logging.info(f"Getting response from Excel using model: {model}")
357
+
358
+ messages = [
359
+ {"role": "system", "content": "You are a highly specialized data analyst with expertise in analyzing and summarizing Excel spreadsheets. Your goal is to provide accurate, detailed, and precise summaries based on the data provided. Focus on identifying key metrics, trends, and significant details relevant to the query. Avoid making assumptions or adding information that is not explicitly supported by the data."},
360
+ {"role": "user", "content": f"Using the following data extracted from Excel spreadsheets:\n{context}\n\nPlease analyze this data and provide a comprehensive answer to the following question: '{query}'. Include relevant statistics, trends, and insights. If appropriate, suggest visualizations that could help illustrate the findings."}
361
+ ]
362
+
363
+ if model.startswith("duckduckgo/"):
364
+ # Use DuckDuckGo chat with context
365
+ return get_response_from_duckduckgo(query, model, context, num_calls, temperature)
366
+ elif model == "@cf/meta/llama-3.1-8b-instruct":
367
+ # Use Cloudflare API
368
+ return get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="excel")
369
+ else:
370
+ # Use Hugging Face API
371
+ client = InferenceClient(model, token=huggingface_token)
372
+
373
+ response = ""
374
+ for i in range(num_calls):
375
+ logging.info(f"API call {i+1}/{num_calls}")
376
+ for message in client.chat_completion(
377
+ messages=messages,
378
+ max_tokens=20000,
379
+ temperature=temperature,
380
+ stream=True,
381
+ top_p=0.8,
382
+ ):
383
+ if message.choices and message.choices[0].delta and message.choices[0].delta.content:
384
+ chunk = message.choices[0].delta.content
385
+ response += chunk
386
+ yield response # Yield partial response
387
+
388
+ logging.info("Finished generating response for Excel data")
389
+
390
  # Modify the existing respond function to handle both PDF and web search
391
  def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
392
  logging.info(f"User Query: {message}")
 
416
  yield final_summary
417
  else:
418
  yield "Unable to generate a response. Please try a different query."
419
+
420
  else:
 
421
  try:
422
  embed = get_embeddings()
423
  pdf_database = None
 
448
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
449
  return
450
 
451
+ # Separate Excel documents from others
452
+ excel_docs = [doc for doc in relevant_docs if doc.metadata["source"].lower().endswith(('.xlsx', '.xls'))]
453
+ other_docs = [doc for doc in relevant_docs if not doc.metadata["source"].lower().endswith(('.xlsx', '.xls'))]
454
+
455
+ excel_context = "\n".join([doc.page_content for doc in excel_docs])
456
+ other_context = "\n".join([doc.page_content for doc in other_docs])
457
 
458
+ logging.info(f"Excel context length: {len(excel_context)}")
459
+ logging.info(f"Other context length: {len(other_context)}")
 
460
 
461
+ # Process Excel documents
462
+ if excel_docs:
463
+ for response in get_response_from_excel(message, model, excel_context, num_calls, temperature):
 
464
  yield response
465
+
466
+ # Process other documents (PDF, Word)
467
+ if other_docs:
468
+ if model == "@cf/meta/llama-3.1-8b-instruct":
469
+ for response in get_response_from_cloudflare(prompt="", context=other_context, query=message, num_calls=num_calls, temperature=temperature, search_type="document"):
470
+ yield response
471
+ else:
472
+ for response in get_response_from_pdf(message, model, selected_docs, num_calls, temperature):
473
+ yield response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
 
475
  except Exception as e:
476
  logging.error(f"Error with {model}: {str(e)}")