thechaiexperiment commited on
Commit
60aab5b
·
1 Parent(s): de43d0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -0
app.py CHANGED
@@ -424,6 +424,69 @@ def remove_incomplete_sentence(text):
424
  return text[:last_period_index + 1].strip()
425
  return text
426
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  @app.get("/")
428
  async def root():
429
  return {"message": "Welcome to the FastAPI application! Use the /health endpoint to check health, and /api/query for processing queries."}
 
424
  return text[:last_period_index + 1].strip()
425
  return text
426
 
427
+
428
+ language_code = 1
429
+ query_text = 'What are symptoms of heart attack ?'
430
+ query_embedding = embed_query_text(query_text) # Embed the query text
431
+ initial_results = query_embeddings(query_embedding, embeddings_data, n_results=5)
432
+ document_ids = [doc_id for doc_id, _ in initial_results]
433
+ print(document_ids)
434
+ document_ids = [doc_id for doc_id, _ in initial_results]
435
+ document_texts = retrieve_document_texts(document_ids, folder_path)
436
+
437
+ # Rerank the results using the CrossEncoder
438
+ scores = cross_encoder.predict([(query_text, doc) for doc in document_texts])
439
+ scored_documents = list(zip(scores, document_ids, document_texts))
440
+ scored_documents.sort(key=lambda x: x[0], reverse=True)
441
+ print("Reranked results:")
442
+ for idx, (score, doc_id, doc) in enumerate(scored_documents):
443
+ print(f"Rank {idx + 1} (Score: {score:.4f}, Document ID: {doc_id}")
444
+ relevant_portions = extract_relevant_portions(document_texts, query_text, max_portions=3, portion_size=1, min_query_words=1)
445
+
446
+ for doc_id, portions in relevant_portions.items():
447
+ print(f"{doc_id}: {portions}")
448
+
449
+ flattened_relevant_portions = []
450
+ for doc_id, portions in relevant_portions.items():
451
+ flattened_relevant_portions.extend(portions)
452
+
453
+ # Remove duplicate portions
454
+ unique_selected_parts = remove_duplicates(flattened_relevant_portions)
455
+
456
+ # Combine the unique parts into a single string of context
457
+ combined_parts = " ".join(unique_selected_parts)
458
+
459
+ # Construct context as a list: first the query, then the unique selected portions
460
+ context = [query_text] + unique_selected_parts
461
+
462
+ # Print the context (query + relevant portions)
463
+ print(context)
464
+ entities = extract_entities(query_text)
465
+ passage = enhance_passage_with_entities(combined_parts, entities)
466
+ # Generate answer with the enhanced passage
467
+ prompt = create_prompt(query_text, passage)
468
+ answer, generation_time = generate_answer(prompt)
469
+ print(f"\nTime taken to generate the answer: {generation_time:.2f} seconds")
470
+ answer_part = answer.split("Answer:")[-1].strip()
471
+ cleaned_answer = remove_answer_prefix(answer_part)
472
+ final_answer = remove_incomplete_sentence(cleaned_answer)
473
+
474
+ if language_code == 0:
475
+ final_answer = translate_en_to_ar(final_answer)
476
+
477
+ if final_answer:
478
+ print("Answer:")
479
+ print(final_answer)
480
+ else:
481
+ print("Sorry, I can't help with that.")
482
+
483
+
484
+
485
+
486
+
487
+
488
+
489
+
490
  @app.get("/")
491
  async def root():
492
  return {"message": "Welcome to the FastAPI application! Use the /health endpoint to check health, and /api/query for processing queries."}