bhaskartripathi commited on
Commit
7ff879d
·
1 Parent(s): 7f9e142

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -6
app.py CHANGED
@@ -38,11 +38,18 @@ def text_to_chunks(texts, file_names, word_length=150, start_page=1):
38
  page_nums = []
39
  chunks = []
40
 
41
- current_file_idx = 0
42
 
43
  for idx, words in enumerate(text_toks):
44
- if idx > 0 and idx % len(file_names) == 0:
45
- current_file_idx += 1
 
 
 
 
 
 
 
46
 
47
  for i in range(0, len(words), word_length):
48
  chunk = words[i:i+word_length]
@@ -51,11 +58,12 @@ def text_to_chunks(texts, file_names, word_length=150, start_page=1):
51
  text_toks[idx+1] = chunk + text_toks[idx+1]
52
  continue
53
  chunk = ' '.join(chunk).strip()
54
- chunk = f'[{file_names[current_file_idx]}, Page no. {idx+start_page}]' + ' ' + '"' + chunk + '"'
55
  chunks.append(chunk)
56
  return chunks
57
 
58
 
 
59
  class SemanticSearch:
60
 
61
  def __init__(self):
@@ -91,18 +99,19 @@ class SemanticSearch:
91
  def load_recommender(paths, start_page=1):
92
  global recommender
93
  all_texts = []
94
- file_names = []
95
 
96
  for path in paths:
97
  texts = pdf_to_text(path, start_page=start_page)
98
  all_texts.extend(texts)
99
- file_names.append(os.path.basename(path))
100
 
101
  chunks = text_to_chunks(all_texts, file_names, start_page=start_page)
102
  recommender.fit(chunks)
103
  return 'Corpus Loaded.'
104
 
105
 
 
106
 
107
  def generate_text(openAI_key, prompt, engine="text-davinci-003"):
108
  openai.api_key = openAI_key
 
38
  page_nums = []
39
  chunks = []
40
 
41
+ total_pages = len(texts)
42
 
43
  for idx, words in enumerate(text_toks):
44
+ current_file_idx = 0
45
+ current_page = idx + start_page
46
+
47
+ for i, num_pages in enumerate(file_names.values()):
48
+ if current_page > num_pages:
49
+ current_page -= num_pages
50
+ current_file_idx += 1
51
+ else:
52
+ break
53
 
54
  for i in range(0, len(words), word_length):
55
  chunk = words[i:i+word_length]
 
58
  text_toks[idx+1] = chunk + text_toks[idx+1]
59
  continue
60
  chunk = ' '.join(chunk).strip()
61
+ chunk = f'[{list(file_names.keys())[current_file_idx]}, Page no. {current_page}]' + ' ' + '"' + chunk + '"'
62
  chunks.append(chunk)
63
  return chunks
64
 
65
 
66
+
67
  class SemanticSearch:
68
 
69
  def __init__(self):
 
99
  def load_recommender(paths, start_page=1):
100
  global recommender
101
  all_texts = []
102
+ file_names = {}
103
 
104
  for path in paths:
105
  texts = pdf_to_text(path, start_page=start_page)
106
  all_texts.extend(texts)
107
+ file_names[os.path.basename(path)] = len(texts)
108
 
109
  chunks = text_to_chunks(all_texts, file_names, start_page=start_page)
110
  recommender.fit(chunks)
111
  return 'Corpus Loaded.'
112
 
113
 
114
+
115
 
116
  def generate_text(openAI_key, prompt, engine="text-davinci-003"):
117
  openai.api_key = openAI_key