Spaces:

thechaiexperiment
/

TeaRAG

Sleeping

App Files Files Community

thechaiexperiment commited on Jan 17

Commit

60aab5b

1 Parent(s): de43d0e

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -0

app.py CHANGED Viewed

@@ -424,6 +424,69 @@ def remove_incomplete_sentence(text):
             return text[:last_period_index + 1].strip()
     return text
 @app.get("/")
 async def root():
     return {"message": "Welcome to the FastAPI application! Use the /health endpoint to check health, and /api/query for processing queries."}

             return text[:last_period_index + 1].strip()
     return text
+language_code = 1
+query_text = 'What are symptoms of heart attack ?'
+query_embedding = embed_query_text(query_text)  # Embed the query text
+initial_results = query_embeddings(query_embedding, embeddings_data, n_results=5)
+document_ids = [doc_id for doc_id, _ in initial_results]
+print(document_ids)
+document_ids = [doc_id for doc_id, _ in initial_results]
+document_texts = retrieve_document_texts(document_ids, folder_path)
+# Rerank the results using the CrossEncoder
+scores = cross_encoder.predict([(query_text, doc) for doc in document_texts])
+scored_documents = list(zip(scores, document_ids, document_texts))
+scored_documents.sort(key=lambda x: x[0], reverse=True)
+print("Reranked results:")
+for idx, (score, doc_id, doc) in enumerate(scored_documents):
+    print(f"Rank {idx + 1} (Score: {score:.4f}, Document ID: {doc_id}")
+relevant_portions = extract_relevant_portions(document_texts, query_text, max_portions=3, portion_size=1, min_query_words=1)
+for doc_id, portions in relevant_portions.items():
+    print(f"{doc_id}: {portions}")
+flattened_relevant_portions = []
+for doc_id, portions in relevant_portions.items():
+    flattened_relevant_portions.extend(portions)
+# Remove duplicate portions
+unique_selected_parts = remove_duplicates(flattened_relevant_portions)
+# Combine the unique parts into a single string of context
+combined_parts = " ".join(unique_selected_parts)
+# Construct context as a list: first the query, then the unique selected portions
+context = [query_text] + unique_selected_parts
+# Print the context (query + relevant portions)
+print(context)
+entities = extract_entities(query_text)
+passage = enhance_passage_with_entities(combined_parts, entities)
+# Generate answer with the enhanced passage
+prompt = create_prompt(query_text, passage)
+answer, generation_time = generate_answer(prompt)
+print(f"\nTime taken to generate the answer: {generation_time:.2f} seconds")
+answer_part = answer.split("Answer:")[-1].strip()
+cleaned_answer = remove_answer_prefix(answer_part)
+final_answer = remove_incomplete_sentence(cleaned_answer)
+if language_code == 0:
+    final_answer = translate_en_to_ar(final_answer)
+if final_answer:
+    print("Answer:")
+    print(final_answer)
+else:
+    print("Sorry, I can't help with that.")
 @app.get("/")
 async def root():
     return {"message": "Welcome to the FastAPI application! Use the /health endpoint to check health, and /api/query for processing queries."}