Spaces:
Running
Running
deveix
commited on
Commit
·
642181a
1
Parent(s):
dc43c61
fix quran tafsir
Browse files- app/dataset/quran_tafseer.txt +0 -0
- app/dataset/quran_tafseer_formatted.txt +0 -0
- app/main.py +32 -1
app/dataset/quran_tafseer.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app/dataset/quran_tafseer_formatted.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app/main.py
CHANGED
@@ -40,6 +40,27 @@ app.add_middleware(
|
|
40 |
allow_headers=["*"],
|
41 |
)
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
# Existing API endpoints
|
44 |
@app.get("/")
|
45 |
async def read_root():
|
@@ -75,8 +96,18 @@ async def get_answer(item: Item, token: str = Depends(verify_token)):
|
|
75 |
try:
|
76 |
# Perform the similarity search with the provided question
|
77 |
matching_docs = vector_search.similarity_search(item.question, k=3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
-
return {"
|
80 |
except Exception as e:
|
81 |
# If there's an error, return a 500 error with the error's details
|
82 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
40 |
allow_headers=["*"],
|
41 |
)
|
42 |
|
43 |
+
|
44 |
+
def index_file(filepath):
|
45 |
+
""" Index each line in a file for quick search. Returns a dictionary with key as content and value as line number. """
|
46 |
+
index = {}
|
47 |
+
with open(filepath, 'r', encoding='utf-8') as file:
|
48 |
+
for line_number, line in enumerate(file, 1): # Starting line numbers at 1 for human readability
|
49 |
+
index[line.strip()] = line_number
|
50 |
+
return index
|
51 |
+
|
52 |
+
def get_text_by_line_number(filepath, line_numbers):
|
53 |
+
""" Retrieve specific lines from a file based on line numbers. """
|
54 |
+
lines = {}
|
55 |
+
with open(filepath, 'r', encoding='utf-8') as file:
|
56 |
+
for line_number, line in enumerate(file, 1):
|
57 |
+
if line_number in line_numbers:
|
58 |
+
lines[line_number] = line.strip()
|
59 |
+
if len(lines) == len(line_numbers): # Stop reading once all required lines are read
|
60 |
+
break
|
61 |
+
return lines
|
62 |
+
|
63 |
+
|
64 |
# Existing API endpoints
|
65 |
@app.get("/")
|
66 |
async def read_root():
|
|
|
96 |
try:
|
97 |
# Perform the similarity search with the provided question
|
98 |
matching_docs = vector_search.similarity_search(item.question, k=3)
|
99 |
+
clean_answers = [doc.page_content.replace("\n", " ").strip() for doc in matching_docs]
|
100 |
+
|
101 |
+
# Assuming 'search_file.txt' is where we want to search answers
|
102 |
+
answers_index = index_file('dataset/quran_tafseer_formatted.txt')
|
103 |
+
|
104 |
+
# Collect line numbers based on answers found
|
105 |
+
line_numbers = [answers_index[answer] for answer in clean_answers if answer in answers_index]
|
106 |
+
|
107 |
+
# Assuming 'retrieve_file.txt' is where we retrieve lines based on line numbers
|
108 |
+
result_text = get_text_by_line_number('dataset/quran_tafseer.txt', line_numbers)
|
109 |
|
110 |
+
return {"result_text": result_text}
|
111 |
except Exception as e:
|
112 |
# If there's an error, return a 500 error with the error's details
|
113 |
raise HTTPException(status_code=500, detail=str(e))
|