deveix commited on
Commit
886c1e1
·
1 Parent(s): 5b25c6e

fix search

Browse files
Files changed (1) hide show
  1. app/main.py +27 -12
app/main.py CHANGED
@@ -42,23 +42,38 @@ app.add_middleware(
42
 
43
 
44
  def index_file(filepath):
45
- """ Index each line in a file for quick search. Returns a dictionary with key as content and value as line number. """
 
46
  index = {}
47
  with open(filepath, 'r', encoding='utf-8') as file:
48
- for line_number, line in enumerate(file, 1): # Starting line numbers at 1 for human readability
49
- index[line.strip()] = line_number
 
 
 
 
 
 
 
 
50
  return index
51
 
52
- def get_text_by_line_number(filepath, line_numbers):
53
- """ Retrieve specific lines from a file based on line numbers. """
54
- lines = {}
 
55
  with open(filepath, 'r', encoding='utf-8') as file:
56
- for line_number, line in enumerate(file, 1):
57
- if line_number in line_numbers:
58
- lines[line_number] = line.strip()
59
- if len(lines) == len(line_numbers): # Stop reading once all required lines are read
 
 
 
 
 
60
  break
61
- return lines
62
 
63
 
64
  # Existing API endpoints
@@ -105,7 +120,7 @@ async def get_answer(item: Item, token: str = Depends(verify_token)):
105
  line_numbers = [answers_index[answer] for answer in clean_answers if answer in answers_index]
106
 
107
  # Assuming 'retrieve_file.txt' is where we retrieve lines based on line numbers
108
- result_text = get_text_by_line_number('app/quran_tafseer.txt', line_numbers)
109
 
110
  return {"result_text": result_text}
111
  except Exception as e:
 
42
 
43
 
44
  def index_file(filepath):
45
+ """ Index each block in a file separated by double newlines for quick search.
46
+ Returns a dictionary with key as content and value as block number. """
47
  index = {}
48
  with open(filepath, 'r', encoding='utf-8') as file:
49
+ content = file.read() # Read the whole file at once
50
+ blocks = content.split("\n\n") # Split the content by double newlines
51
+
52
+ for block_number, block in enumerate(blocks, 1): # Starting block numbers at 1 for human readability
53
+ # Replace single newlines within blocks with space and strip leading/trailing whitespace
54
+ formatted_block = ' '.join(block.split('\n')).strip()
55
+ index[formatted_block] = block_number
56
+ # if(block_number == 100):
57
+ # print(formatted_block) # Print the 5th block
58
+
59
  return index
60
 
61
+
62
+ def get_text_by_block_number(filepath, block_numbers):
63
+ """ Retrieve specific blocks from a file based on block numbers, where each block is separated by '\n\n'. """
64
+ blocks_text = []
65
  with open(filepath, 'r', encoding='utf-8') as file:
66
+ content = file.read() # Read the whole file at once
67
+ blocks = content.split("\n\n") # Split the content by double newlines
68
+
69
+ for block_number, block in enumerate(blocks, 1): # Starting block numbers at 1 for human readability
70
+ if block_number in block_numbers:
71
+ # Replace single newlines within blocks with space and strip leading/trailing whitespace
72
+ formatted_block = ' '.join(block.split('\n')).strip()
73
+ blocks_text.append(formatted_block)
74
+ if len(blocks_text) == len(block_numbers): # Stop reading once all required blocks are retrieved
75
  break
76
+ return blocks_text
77
 
78
 
79
  # Existing API endpoints
 
120
  line_numbers = [answers_index[answer] for answer in clean_answers if answer in answers_index]
121
 
122
  # Assuming 'retrieve_file.txt' is where we retrieve lines based on line numbers
123
+ result_text = get_text_by_block_number('app/quran_tafseer.txt', line_numbers)
124
 
125
  return {"result_text": result_text}
126
  except Exception as e: