Shreyas094 commited on
Commit
42fe9e9
·
verified ·
1 Parent(s): a3c94f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -6
app.py CHANGED
@@ -353,12 +353,12 @@ def summarize_web_results(query: str, search_results: List[Dict[str, str]], conv
353
  except Exception as e:
354
  return f"An error occurred during summarization: {str(e)}"
355
 
356
- def get_response_from_gemini(query, context, file_type, num_calls=1, temperature=0.2):
357
  # Configure the Gemini API
358
  genai.configure(api_key=os.environ["GEMINI_API_KEY"])
359
 
360
  # Define the model
361
- model = genai.GenerativeModel(
362
  model_name="gemini-1.5-flash",
363
  generation_config={
364
  "temperature": temperature,
@@ -368,27 +368,55 @@ def get_response_from_gemini(query, context, file_type, num_calls=1, temperature
368
  },
369
  )
370
 
371
- # Create the system instruction based on file type
372
  if file_type == "excel":
 
373
  system_instruction = """You are a highly specialized data analyst with expertise in Excel spreadsheets.
374
  Your task is to analyze the provided Excel data and answer the user's query accurately and concisely.
375
  Focus on identifying key metrics, trends, and significant details relevant to the query.
376
  Do not make assumptions or include information not explicitly supported by the dataset."""
 
 
 
377
  elif file_type == "pdf":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  system_instruction = """You are a highly specialized document analyst with expertise in extracting information from PDF documents.
379
  Your task is to analyze the provided PDF content and answer the user's query accurately and comprehensively.
380
  Focus on key points, important details, and relevant information from the document.
381
  Ensure your response is strictly based on the provided context."""
 
 
 
382
  else:
383
  raise ValueError("Invalid file type. Use 'excel' or 'pdf'.")
384
 
385
- full_prompt = f"{system_instruction}\n\nContext:\n{context}\n\nUser query: {query}"
386
-
387
  full_response = ""
388
  for _ in range(num_calls):
389
  try:
390
  # Generate content with streaming enabled
391
- response = model.generate_content(full_prompt, stream=True)
392
  for chunk in response:
393
  if chunk.text:
394
  full_response += chunk.text
 
353
  except Exception as e:
354
  return f"An error occurred during summarization: {str(e)}"
355
 
356
+ def get_response_from_gemini(query, model, selected_docs, file_type, num_calls=1, temperature=0.2):
357
  # Configure the Gemini API
358
  genai.configure(api_key=os.environ["GEMINI_API_KEY"])
359
 
360
  # Define the model
361
+ gemini_model = genai.GenerativeModel(
362
  model_name="gemini-1.5-flash",
363
  generation_config={
364
  "temperature": temperature,
 
368
  },
369
  )
370
 
 
371
  if file_type == "excel":
372
+ # Excel functionality remains the same
373
  system_instruction = """You are a highly specialized data analyst with expertise in Excel spreadsheets.
374
  Your task is to analyze the provided Excel data and answer the user's query accurately and concisely.
375
  Focus on identifying key metrics, trends, and significant details relevant to the query.
376
  Do not make assumptions or include information not explicitly supported by the dataset."""
377
+
378
+ full_prompt = f"{system_instruction}\n\nContext:\n{selected_docs}\n\nUser query: {query}"
379
+
380
  elif file_type == "pdf":
381
+ # PDF functionality similar to get_response_from_pdf
382
+ embed = get_embeddings()
383
+ if os.path.exists("faiss_database"):
384
+ database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
385
+ else:
386
+ yield "No documents available. Please upload PDF documents to answer questions."
387
+ return
388
+
389
+ # Pre-filter the documents
390
+ filtered_docs = [doc for doc_id, doc in database.docstore._dict.items()
391
+ if isinstance(doc, Document) and doc.metadata.get("source") in selected_docs]
392
+
393
+ if not filtered_docs:
394
+ yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
395
+ return
396
+
397
+ # Create a new FAISS index with only the selected documents
398
+ filtered_db = FAISS.from_documents(filtered_docs, embed)
399
+
400
+ retriever = filtered_db.as_retriever(search_kwargs={"k": 10})
401
+ relevant_docs = retriever.get_relevant_documents(query)
402
+
403
+ context_str = "\n".join([doc.page_content for doc in relevant_docs])
404
+
405
  system_instruction = """You are a highly specialized document analyst with expertise in extracting information from PDF documents.
406
  Your task is to analyze the provided PDF content and answer the user's query accurately and comprehensively.
407
  Focus on key points, important details, and relevant information from the document.
408
  Ensure your response is strictly based on the provided context."""
409
+
410
+ full_prompt = f"{system_instruction}\n\nContext:\n{context_str}\n\nUser query: {query}\n\nPlease generate a step-by-step reasoning before arriving at a comprehensive and accurate summary addressing the question. Ensure your response is strictly based on the provided context, highlighting key metrics, trends, and significant details relevant to the query. Avoid any speculative or unverified information."
411
+
412
  else:
413
  raise ValueError("Invalid file type. Use 'excel' or 'pdf'.")
414
 
 
 
415
  full_response = ""
416
  for _ in range(num_calls):
417
  try:
418
  # Generate content with streaming enabled
419
+ response = gemini_model.generate_content(full_prompt, stream=True)
420
  for chunk in response:
421
  if chunk.text:
422
  full_response += chunk.text