Spaces:
Sleeping
Sleeping
Commit
·
60aab5b
1
Parent(s):
de43d0e
Update app.py
Browse files
app.py
CHANGED
|
@@ -424,6 +424,69 @@ def remove_incomplete_sentence(text):
|
|
| 424 |
return text[:last_period_index + 1].strip()
|
| 425 |
return text
|
| 426 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
@app.get("/")
|
| 428 |
async def root():
|
| 429 |
return {"message": "Welcome to the FastAPI application! Use the /health endpoint to check health, and /api/query for processing queries."}
|
|
|
|
| 424 |
return text[:last_period_index + 1].strip()
|
| 425 |
return text
|
| 426 |
|
| 427 |
+
|
| 428 |
+
language_code = 1
|
| 429 |
+
query_text = 'What are symptoms of heart attack ?'
|
| 430 |
+
query_embedding = embed_query_text(query_text) # Embed the query text
|
| 431 |
+
initial_results = query_embeddings(query_embedding, embeddings_data, n_results=5)
|
| 432 |
+
document_ids = [doc_id for doc_id, _ in initial_results]
|
| 433 |
+
print(document_ids)
|
| 434 |
+
document_ids = [doc_id for doc_id, _ in initial_results]
|
| 435 |
+
document_texts = retrieve_document_texts(document_ids, folder_path)
|
| 436 |
+
|
| 437 |
+
# Rerank the results using the CrossEncoder
|
| 438 |
+
scores = cross_encoder.predict([(query_text, doc) for doc in document_texts])
|
| 439 |
+
scored_documents = list(zip(scores, document_ids, document_texts))
|
| 440 |
+
scored_documents.sort(key=lambda x: x[0], reverse=True)
|
| 441 |
+
print("Reranked results:")
|
| 442 |
+
for idx, (score, doc_id, doc) in enumerate(scored_documents):
|
| 443 |
+
print(f"Rank {idx + 1} (Score: {score:.4f}, Document ID: {doc_id}")
|
| 444 |
+
relevant_portions = extract_relevant_portions(document_texts, query_text, max_portions=3, portion_size=1, min_query_words=1)
|
| 445 |
+
|
| 446 |
+
for doc_id, portions in relevant_portions.items():
|
| 447 |
+
print(f"{doc_id}: {portions}")
|
| 448 |
+
|
| 449 |
+
flattened_relevant_portions = []
|
| 450 |
+
for doc_id, portions in relevant_portions.items():
|
| 451 |
+
flattened_relevant_portions.extend(portions)
|
| 452 |
+
|
| 453 |
+
# Remove duplicate portions
|
| 454 |
+
unique_selected_parts = remove_duplicates(flattened_relevant_portions)
|
| 455 |
+
|
| 456 |
+
# Combine the unique parts into a single string of context
|
| 457 |
+
combined_parts = " ".join(unique_selected_parts)
|
| 458 |
+
|
| 459 |
+
# Construct context as a list: first the query, then the unique selected portions
|
| 460 |
+
context = [query_text] + unique_selected_parts
|
| 461 |
+
|
| 462 |
+
# Print the context (query + relevant portions)
|
| 463 |
+
print(context)
|
| 464 |
+
entities = extract_entities(query_text)
|
| 465 |
+
passage = enhance_passage_with_entities(combined_parts, entities)
|
| 466 |
+
# Generate answer with the enhanced passage
|
| 467 |
+
prompt = create_prompt(query_text, passage)
|
| 468 |
+
answer, generation_time = generate_answer(prompt)
|
| 469 |
+
print(f"\nTime taken to generate the answer: {generation_time:.2f} seconds")
|
| 470 |
+
answer_part = answer.split("Answer:")[-1].strip()
|
| 471 |
+
cleaned_answer = remove_answer_prefix(answer_part)
|
| 472 |
+
final_answer = remove_incomplete_sentence(cleaned_answer)
|
| 473 |
+
|
| 474 |
+
if language_code == 0:
|
| 475 |
+
final_answer = translate_en_to_ar(final_answer)
|
| 476 |
+
|
| 477 |
+
if final_answer:
|
| 478 |
+
print("Answer:")
|
| 479 |
+
print(final_answer)
|
| 480 |
+
else:
|
| 481 |
+
print("Sorry, I can't help with that.")
|
| 482 |
+
|
| 483 |
+
|
| 484 |
+
|
| 485 |
+
|
| 486 |
+
|
| 487 |
+
|
| 488 |
+
|
| 489 |
+
|
| 490 |
@app.get("/")
|
| 491 |
async def root():
|
| 492 |
return {"message": "Welcome to the FastAPI application! Use the /health endpoint to check health, and /api/query for processing queries."}
|