Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -353,12 +353,12 @@ def summarize_web_results(query: str, search_results: List[Dict[str, str]], conv
|
|
353 |
except Exception as e:
|
354 |
return f"An error occurred during summarization: {str(e)}"
|
355 |
|
356 |
-
def get_response_from_gemini(query,
|
357 |
# Configure the Gemini API
|
358 |
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
|
359 |
|
360 |
# Define the model
|
361 |
-
|
362 |
model_name="gemini-1.5-flash",
|
363 |
generation_config={
|
364 |
"temperature": temperature,
|
@@ -368,27 +368,55 @@ def get_response_from_gemini(query, context, file_type, num_calls=1, temperature
|
|
368 |
},
|
369 |
)
|
370 |
|
371 |
-
# Create the system instruction based on file type
|
372 |
if file_type == "excel":
|
|
|
373 |
system_instruction = """You are a highly specialized data analyst with expertise in Excel spreadsheets.
|
374 |
Your task is to analyze the provided Excel data and answer the user's query accurately and concisely.
|
375 |
Focus on identifying key metrics, trends, and significant details relevant to the query.
|
376 |
Do not make assumptions or include information not explicitly supported by the dataset."""
|
|
|
|
|
|
|
377 |
elif file_type == "pdf":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
378 |
system_instruction = """You are a highly specialized document analyst with expertise in extracting information from PDF documents.
|
379 |
Your task is to analyze the provided PDF content and answer the user's query accurately and comprehensively.
|
380 |
Focus on key points, important details, and relevant information from the document.
|
381 |
Ensure your response is strictly based on the provided context."""
|
|
|
|
|
|
|
382 |
else:
|
383 |
raise ValueError("Invalid file type. Use 'excel' or 'pdf'.")
|
384 |
|
385 |
-
full_prompt = f"{system_instruction}\n\nContext:\n{context}\n\nUser query: {query}"
|
386 |
-
|
387 |
full_response = ""
|
388 |
for _ in range(num_calls):
|
389 |
try:
|
390 |
# Generate content with streaming enabled
|
391 |
-
response =
|
392 |
for chunk in response:
|
393 |
if chunk.text:
|
394 |
full_response += chunk.text
|
|
|
353 |
except Exception as e:
|
354 |
return f"An error occurred during summarization: {str(e)}"
|
355 |
|
356 |
+
def get_response_from_gemini(query, model, selected_docs, file_type, num_calls=1, temperature=0.2):
|
357 |
# Configure the Gemini API
|
358 |
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
|
359 |
|
360 |
# Define the model
|
361 |
+
gemini_model = genai.GenerativeModel(
|
362 |
model_name="gemini-1.5-flash",
|
363 |
generation_config={
|
364 |
"temperature": temperature,
|
|
|
368 |
},
|
369 |
)
|
370 |
|
|
|
371 |
if file_type == "excel":
|
372 |
+
# Excel functionality remains the same
|
373 |
system_instruction = """You are a highly specialized data analyst with expertise in Excel spreadsheets.
|
374 |
Your task is to analyze the provided Excel data and answer the user's query accurately and concisely.
|
375 |
Focus on identifying key metrics, trends, and significant details relevant to the query.
|
376 |
Do not make assumptions or include information not explicitly supported by the dataset."""
|
377 |
+
|
378 |
+
full_prompt = f"{system_instruction}\n\nContext:\n{selected_docs}\n\nUser query: {query}"
|
379 |
+
|
380 |
elif file_type == "pdf":
|
381 |
+
# PDF functionality similar to get_response_from_pdf
|
382 |
+
embed = get_embeddings()
|
383 |
+
if os.path.exists("faiss_database"):
|
384 |
+
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
385 |
+
else:
|
386 |
+
yield "No documents available. Please upload PDF documents to answer questions."
|
387 |
+
return
|
388 |
+
|
389 |
+
# Pre-filter the documents
|
390 |
+
filtered_docs = [doc for doc_id, doc in database.docstore._dict.items()
|
391 |
+
if isinstance(doc, Document) and doc.metadata.get("source") in selected_docs]
|
392 |
+
|
393 |
+
if not filtered_docs:
|
394 |
+
yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
|
395 |
+
return
|
396 |
+
|
397 |
+
# Create a new FAISS index with only the selected documents
|
398 |
+
filtered_db = FAISS.from_documents(filtered_docs, embed)
|
399 |
+
|
400 |
+
retriever = filtered_db.as_retriever(search_kwargs={"k": 10})
|
401 |
+
relevant_docs = retriever.get_relevant_documents(query)
|
402 |
+
|
403 |
+
context_str = "\n".join([doc.page_content for doc in relevant_docs])
|
404 |
+
|
405 |
system_instruction = """You are a highly specialized document analyst with expertise in extracting information from PDF documents.
|
406 |
Your task is to analyze the provided PDF content and answer the user's query accurately and comprehensively.
|
407 |
Focus on key points, important details, and relevant information from the document.
|
408 |
Ensure your response is strictly based on the provided context."""
|
409 |
+
|
410 |
+
full_prompt = f"{system_instruction}\n\nContext:\n{context_str}\n\nUser query: {query}\n\nPlease generate a step-by-step reasoning before arriving at a comprehensive and accurate summary addressing the question. Ensure your response is strictly based on the provided context, highlighting key metrics, trends, and significant details relevant to the query. Avoid any speculative or unverified information."
|
411 |
+
|
412 |
else:
|
413 |
raise ValueError("Invalid file type. Use 'excel' or 'pdf'.")
|
414 |
|
|
|
|
|
415 |
full_response = ""
|
416 |
for _ in range(num_calls):
|
417 |
try:
|
418 |
# Generate content with streaming enabled
|
419 |
+
response = gemini_model.generate_content(full_prompt, stream=True)
|
420 |
for chunk in response:
|
421 |
if chunk.text:
|
422 |
full_response += chunk.text
|