Spaces:
Running
Running
deveix
commited on
Commit
·
886c1e1
1
Parent(s):
5b25c6e
fix search
Browse files- app/main.py +27 -12
app/main.py
CHANGED
@@ -42,23 +42,38 @@ app.add_middleware(
|
|
42 |
|
43 |
|
44 |
def index_file(filepath):
|
45 |
-
""" Index each
|
|
|
46 |
index = {}
|
47 |
with open(filepath, 'r', encoding='utf-8') as file:
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
return index
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
55 |
with open(filepath, 'r', encoding='utf-8') as file:
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
60 |
break
|
61 |
-
return
|
62 |
|
63 |
|
64 |
# Existing API endpoints
|
@@ -105,7 +120,7 @@ async def get_answer(item: Item, token: str = Depends(verify_token)):
|
|
105 |
line_numbers = [answers_index[answer] for answer in clean_answers if answer in answers_index]
|
106 |
|
107 |
# Assuming 'retrieve_file.txt' is where we retrieve lines based on line numbers
|
108 |
-
result_text =
|
109 |
|
110 |
return {"result_text": result_text}
|
111 |
except Exception as e:
|
|
|
42 |
|
43 |
|
44 |
def index_file(filepath):
|
45 |
+
""" Index each block in a file separated by double newlines for quick search.
|
46 |
+
Returns a dictionary with key as content and value as block number. """
|
47 |
index = {}
|
48 |
with open(filepath, 'r', encoding='utf-8') as file:
|
49 |
+
content = file.read() # Read the whole file at once
|
50 |
+
blocks = content.split("\n\n") # Split the content by double newlines
|
51 |
+
|
52 |
+
for block_number, block in enumerate(blocks, 1): # Starting block numbers at 1 for human readability
|
53 |
+
# Replace single newlines within blocks with space and strip leading/trailing whitespace
|
54 |
+
formatted_block = ' '.join(block.split('\n')).strip()
|
55 |
+
index[formatted_block] = block_number
|
56 |
+
# if(block_number == 100):
|
57 |
+
# print(formatted_block) # Print the 5th block
|
58 |
+
|
59 |
return index
|
60 |
|
61 |
+
|
62 |
+
def get_text_by_block_number(filepath, block_numbers):
|
63 |
+
""" Retrieve specific blocks from a file based on block numbers, where each block is separated by '\n\n'. """
|
64 |
+
blocks_text = []
|
65 |
with open(filepath, 'r', encoding='utf-8') as file:
|
66 |
+
content = file.read() # Read the whole file at once
|
67 |
+
blocks = content.split("\n\n") # Split the content by double newlines
|
68 |
+
|
69 |
+
for block_number, block in enumerate(blocks, 1): # Starting block numbers at 1 for human readability
|
70 |
+
if block_number in block_numbers:
|
71 |
+
# Replace single newlines within blocks with space and strip leading/trailing whitespace
|
72 |
+
formatted_block = ' '.join(block.split('\n')).strip()
|
73 |
+
blocks_text.append(formatted_block)
|
74 |
+
if len(blocks_text) == len(block_numbers): # Stop reading once all required blocks are retrieved
|
75 |
break
|
76 |
+
return blocks_text
|
77 |
|
78 |
|
79 |
# Existing API endpoints
|
|
|
120 |
line_numbers = [answers_index[answer] for answer in clean_answers if answer in answers_index]
|
121 |
|
122 |
# Assuming 'retrieve_file.txt' is where we retrieve lines based on line numbers
|
123 |
+
result_text = get_text_by_block_number('app/quran_tafseer.txt', line_numbers)
|
124 |
|
125 |
return {"result_text": result_text}
|
126 |
except Exception as e:
|