deveix commited on
Commit
642181a
·
1 Parent(s): dc43c61

fix quran tafsir

Browse files
app/dataset/quran_tafseer.txt ADDED
The diff for this file is too large to render. See raw diff
 
app/dataset/quran_tafseer_formatted.txt ADDED
The diff for this file is too large to render. See raw diff
 
app/main.py CHANGED
@@ -40,6 +40,27 @@ app.add_middleware(
40
  allow_headers=["*"],
41
  )
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  # Existing API endpoints
44
  @app.get("/")
45
  async def read_root():
@@ -75,8 +96,18 @@ async def get_answer(item: Item, token: str = Depends(verify_token)):
75
  try:
76
  # Perform the similarity search with the provided question
77
  matching_docs = vector_search.similarity_search(item.question, k=3)
 
 
 
 
 
 
 
 
 
 
78
 
79
- return {"answers": [doc.page_content for doc in matching_docs]}
80
  except Exception as e:
81
  # If there's an error, return a 500 error with the error's details
82
  raise HTTPException(status_code=500, detail=str(e))
 
40
  allow_headers=["*"],
41
  )
42
 
43
+
44
+ def index_file(filepath):
45
+ """ Index each line in a file for quick search. Returns a dictionary with key as content and value as line number. """
46
+ index = {}
47
+ with open(filepath, 'r', encoding='utf-8') as file:
48
+ for line_number, line in enumerate(file, 1): # Starting line numbers at 1 for human readability
49
+ index[line.strip()] = line_number
50
+ return index
51
+
52
+ def get_text_by_line_number(filepath, line_numbers):
53
+ """ Retrieve specific lines from a file based on line numbers. """
54
+ lines = {}
55
+ with open(filepath, 'r', encoding='utf-8') as file:
56
+ for line_number, line in enumerate(file, 1):
57
+ if line_number in line_numbers:
58
+ lines[line_number] = line.strip()
59
+ if len(lines) == len(line_numbers): # Stop reading once all required lines are read
60
+ break
61
+ return lines
62
+
63
+
64
  # Existing API endpoints
65
  @app.get("/")
66
  async def read_root():
 
96
  try:
97
  # Perform the similarity search with the provided question
98
  matching_docs = vector_search.similarity_search(item.question, k=3)
99
+ clean_answers = [doc.page_content.replace("\n", " ").strip() for doc in matching_docs]
100
+
101
+ # Assuming 'search_file.txt' is where we want to search answers
102
+ answers_index = index_file('dataset/quran_tafseer_formatted.txt')
103
+
104
+ # Collect line numbers based on answers found
105
+ line_numbers = [answers_index[answer] for answer in clean_answers if answer in answers_index]
106
+
107
+ # Assuming 'retrieve_file.txt' is where we retrieve lines based on line numbers
108
+ result_text = get_text_by_line_number('dataset/quran_tafseer.txt', line_numbers)
109
 
110
+ return {"result_text": result_text}
111
  except Exception as e:
112
  # If there's an error, return a 500 error with the error's details
113
  raise HTTPException(status_code=500, detail=str(e))