Spaces:

ahmedkasem
/

quran-nlp

Sleeping

App Files Files Community

deveix commited on Apr 13, 2024

Commit

642181a

1 Parent(s): dc43c61

fix quran tafsir

Browse files

Files changed (3) hide show

app/dataset/quran_tafseer.txt +0 -0
app/dataset/quran_tafseer_formatted.txt +0 -0
app/main.py +32 -1

app/dataset/quran_tafseer.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

app/dataset/quran_tafseer_formatted.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

app/main.py CHANGED Viewed

@@ -40,6 +40,27 @@ app.add_middleware(
     allow_headers=["*"],
 )
 # Existing API endpoints
 @app.get("/")
 async def read_root():
@@ -75,8 +96,18 @@ async def get_answer(item: Item, token: str = Depends(verify_token)):
     try:
         # Perform the similarity search with the provided question
         matching_docs = vector_search.similarity_search(item.question, k=3)
-        return {"answers": [doc.page_content for doc in matching_docs]}
     except Exception as e:
         # If there's an error, return a 500 error with the error's details
         raise HTTPException(status_code=500, detail=str(e))

     allow_headers=["*"],
 )
+def index_file(filepath):
+    """ Index each line in a file for quick search. Returns a dictionary with key as content and value as line number. """
+    index = {}
+    with open(filepath, 'r', encoding='utf-8') as file:
+        for line_number, line in enumerate(file, 1):  # Starting line numbers at 1 for human readability
+            index[line.strip()] = line_number
+    return index
+def get_text_by_line_number(filepath, line_numbers):
+    """ Retrieve specific lines from a file based on line numbers. """
+    lines = {}
+    with open(filepath, 'r', encoding='utf-8') as file:
+        for line_number, line in enumerate(file, 1):
+            if line_number in line_numbers:
+                lines[line_number] = line.strip()
+                if len(lines) == len(line_numbers):  # Stop reading once all required lines are read
+                    break
+    return lines
 # Existing API endpoints
 @app.get("/")
 async def read_root():
     try:
         # Perform the similarity search with the provided question
         matching_docs = vector_search.similarity_search(item.question, k=3)
+        clean_answers = [doc.page_content.replace("\n", " ").strip() for doc in matching_docs]
+        # Assuming 'search_file.txt' is where we want to search answers
+        answers_index = index_file('dataset/quran_tafseer_formatted.txt')
+        # Collect line numbers based on answers found
+        line_numbers = [answers_index[answer] for answer in clean_answers if answer in answers_index]
+        # Assuming 'retrieve_file.txt' is where we retrieve lines based on line numbers
+        result_text = get_text_by_line_number('dataset/quran_tafseer.txt', line_numbers)
+        return {"result_text": result_text}
     except Exception as e:
         # If there's an error, return a 500 error with the error's details
         raise HTTPException(status_code=500, detail=str(e))