Spaces:
Runtime error
Runtime error
Commit
·
7ff879d
1
Parent(s):
7f9e142
Update app.py
Browse files
app.py
CHANGED
@@ -38,11 +38,18 @@ def text_to_chunks(texts, file_names, word_length=150, start_page=1):
|
|
38 |
page_nums = []
|
39 |
chunks = []
|
40 |
|
41 |
-
|
42 |
|
43 |
for idx, words in enumerate(text_toks):
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
for i in range(0, len(words), word_length):
|
48 |
chunk = words[i:i+word_length]
|
@@ -51,11 +58,12 @@ def text_to_chunks(texts, file_names, word_length=150, start_page=1):
|
|
51 |
text_toks[idx+1] = chunk + text_toks[idx+1]
|
52 |
continue
|
53 |
chunk = ' '.join(chunk).strip()
|
54 |
-
chunk = f'[{file_names[current_file_idx]}, Page no. {
|
55 |
chunks.append(chunk)
|
56 |
return chunks
|
57 |
|
58 |
|
|
|
59 |
class SemanticSearch:
|
60 |
|
61 |
def __init__(self):
|
@@ -91,18 +99,19 @@ class SemanticSearch:
|
|
91 |
def load_recommender(paths, start_page=1):
|
92 |
global recommender
|
93 |
all_texts = []
|
94 |
-
file_names =
|
95 |
|
96 |
for path in paths:
|
97 |
texts = pdf_to_text(path, start_page=start_page)
|
98 |
all_texts.extend(texts)
|
99 |
-
file_names
|
100 |
|
101 |
chunks = text_to_chunks(all_texts, file_names, start_page=start_page)
|
102 |
recommender.fit(chunks)
|
103 |
return 'Corpus Loaded.'
|
104 |
|
105 |
|
|
|
106 |
|
107 |
def generate_text(openAI_key, prompt, engine="text-davinci-003"):
|
108 |
openai.api_key = openAI_key
|
|
|
38 |
page_nums = []
|
39 |
chunks = []
|
40 |
|
41 |
+
total_pages = len(texts)
|
42 |
|
43 |
for idx, words in enumerate(text_toks):
|
44 |
+
current_file_idx = 0
|
45 |
+
current_page = idx + start_page
|
46 |
+
|
47 |
+
for i, num_pages in enumerate(file_names.values()):
|
48 |
+
if current_page > num_pages:
|
49 |
+
current_page -= num_pages
|
50 |
+
current_file_idx += 1
|
51 |
+
else:
|
52 |
+
break
|
53 |
|
54 |
for i in range(0, len(words), word_length):
|
55 |
chunk = words[i:i+word_length]
|
|
|
58 |
text_toks[idx+1] = chunk + text_toks[idx+1]
|
59 |
continue
|
60 |
chunk = ' '.join(chunk).strip()
|
61 |
+
chunk = f'[{list(file_names.keys())[current_file_idx]}, Page no. {current_page}]' + ' ' + '"' + chunk + '"'
|
62 |
chunks.append(chunk)
|
63 |
return chunks
|
64 |
|
65 |
|
66 |
+
|
67 |
class SemanticSearch:
|
68 |
|
69 |
def __init__(self):
|
|
|
99 |
def load_recommender(paths, start_page=1):
|
100 |
global recommender
|
101 |
all_texts = []
|
102 |
+
file_names = {}
|
103 |
|
104 |
for path in paths:
|
105 |
texts = pdf_to_text(path, start_page=start_page)
|
106 |
all_texts.extend(texts)
|
107 |
+
file_names[os.path.basename(path)] = len(texts)
|
108 |
|
109 |
chunks = text_to_chunks(all_texts, file_names, start_page=start_page)
|
110 |
recommender.fit(chunks)
|
111 |
return 'Corpus Loaded.'
|
112 |
|
113 |
|
114 |
+
|
115 |
|
116 |
def generate_text(openAI_key, prompt, engine="text-davinci-003"):
|
117 |
openai.api_key = openAI_key
|