Spaces:
Runtime error
Runtime error
deveix
commited on
Commit
·
886c1e1
1
Parent(s):
5b25c6e
fix search
Browse files- app/main.py +27 -12
app/main.py
CHANGED
|
@@ -42,23 +42,38 @@ app.add_middleware(
|
|
| 42 |
|
| 43 |
|
| 44 |
def index_file(filepath):
|
| 45 |
-
""" Index each
|
|
|
|
| 46 |
index = {}
|
| 47 |
with open(filepath, 'r', encoding='utf-8') as file:
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
return index
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
|
|
|
| 55 |
with open(filepath, 'r', encoding='utf-8') as file:
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
break
|
| 61 |
-
return
|
| 62 |
|
| 63 |
|
| 64 |
# Existing API endpoints
|
|
@@ -105,7 +120,7 @@ async def get_answer(item: Item, token: str = Depends(verify_token)):
|
|
| 105 |
line_numbers = [answers_index[answer] for answer in clean_answers if answer in answers_index]
|
| 106 |
|
| 107 |
# Assuming 'retrieve_file.txt' is where we retrieve lines based on line numbers
|
| 108 |
-
result_text =
|
| 109 |
|
| 110 |
return {"result_text": result_text}
|
| 111 |
except Exception as e:
|
|
|
|
| 42 |
|
| 43 |
|
| 44 |
def index_file(filepath):
|
| 45 |
+
""" Index each block in a file separated by double newlines for quick search.
|
| 46 |
+
Returns a dictionary with key as content and value as block number. """
|
| 47 |
index = {}
|
| 48 |
with open(filepath, 'r', encoding='utf-8') as file:
|
| 49 |
+
content = file.read() # Read the whole file at once
|
| 50 |
+
blocks = content.split("\n\n") # Split the content by double newlines
|
| 51 |
+
|
| 52 |
+
for block_number, block in enumerate(blocks, 1): # Starting block numbers at 1 for human readability
|
| 53 |
+
# Replace single newlines within blocks with space and strip leading/trailing whitespace
|
| 54 |
+
formatted_block = ' '.join(block.split('\n')).strip()
|
| 55 |
+
index[formatted_block] = block_number
|
| 56 |
+
# if(block_number == 100):
|
| 57 |
+
# print(formatted_block) # Print the 5th block
|
| 58 |
+
|
| 59 |
return index
|
| 60 |
|
| 61 |
+
|
| 62 |
+
def get_text_by_block_number(filepath, block_numbers):
|
| 63 |
+
""" Retrieve specific blocks from a file based on block numbers, where each block is separated by '\n\n'. """
|
| 64 |
+
blocks_text = []
|
| 65 |
with open(filepath, 'r', encoding='utf-8') as file:
|
| 66 |
+
content = file.read() # Read the whole file at once
|
| 67 |
+
blocks = content.split("\n\n") # Split the content by double newlines
|
| 68 |
+
|
| 69 |
+
for block_number, block in enumerate(blocks, 1): # Starting block numbers at 1 for human readability
|
| 70 |
+
if block_number in block_numbers:
|
| 71 |
+
# Replace single newlines within blocks with space and strip leading/trailing whitespace
|
| 72 |
+
formatted_block = ' '.join(block.split('\n')).strip()
|
| 73 |
+
blocks_text.append(formatted_block)
|
| 74 |
+
if len(blocks_text) == len(block_numbers): # Stop reading once all required blocks are retrieved
|
| 75 |
break
|
| 76 |
+
return blocks_text
|
| 77 |
|
| 78 |
|
| 79 |
# Existing API endpoints
|
|
|
|
| 120 |
line_numbers = [answers_index[answer] for answer in clean_answers if answer in answers_index]
|
| 121 |
|
| 122 |
# Assuming 'retrieve_file.txt' is where we retrieve lines based on line numbers
|
| 123 |
+
result_text = get_text_by_block_number('app/quran_tafseer.txt', line_numbers)
|
| 124 |
|
| 125 |
return {"result_text": result_text}
|
| 126 |
except Exception as e:
|