Spaces:
Runtime error
Runtime error
Commit
·
7ff879d
1
Parent(s):
7f9e142
Update app.py
Browse files
app.py
CHANGED
|
@@ -38,11 +38,18 @@ def text_to_chunks(texts, file_names, word_length=150, start_page=1):
|
|
| 38 |
page_nums = []
|
| 39 |
chunks = []
|
| 40 |
|
| 41 |
-
|
| 42 |
|
| 43 |
for idx, words in enumerate(text_toks):
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
for i in range(0, len(words), word_length):
|
| 48 |
chunk = words[i:i+word_length]
|
|
@@ -51,11 +58,12 @@ def text_to_chunks(texts, file_names, word_length=150, start_page=1):
|
|
| 51 |
text_toks[idx+1] = chunk + text_toks[idx+1]
|
| 52 |
continue
|
| 53 |
chunk = ' '.join(chunk).strip()
|
| 54 |
-
chunk = f'[{file_names[current_file_idx]}, Page no. {
|
| 55 |
chunks.append(chunk)
|
| 56 |
return chunks
|
| 57 |
|
| 58 |
|
|
|
|
| 59 |
class SemanticSearch:
|
| 60 |
|
| 61 |
def __init__(self):
|
|
@@ -91,18 +99,19 @@ class SemanticSearch:
|
|
| 91 |
def load_recommender(paths, start_page=1):
|
| 92 |
global recommender
|
| 93 |
all_texts = []
|
| 94 |
-
file_names =
|
| 95 |
|
| 96 |
for path in paths:
|
| 97 |
texts = pdf_to_text(path, start_page=start_page)
|
| 98 |
all_texts.extend(texts)
|
| 99 |
-
file_names
|
| 100 |
|
| 101 |
chunks = text_to_chunks(all_texts, file_names, start_page=start_page)
|
| 102 |
recommender.fit(chunks)
|
| 103 |
return 'Corpus Loaded.'
|
| 104 |
|
| 105 |
|
|
|
|
| 106 |
|
| 107 |
def generate_text(openAI_key, prompt, engine="text-davinci-003"):
|
| 108 |
openai.api_key = openAI_key
|
|
|
|
| 38 |
page_nums = []
|
| 39 |
chunks = []
|
| 40 |
|
| 41 |
+
total_pages = len(texts)
|
| 42 |
|
| 43 |
for idx, words in enumerate(text_toks):
|
| 44 |
+
current_file_idx = 0
|
| 45 |
+
current_page = idx + start_page
|
| 46 |
+
|
| 47 |
+
for i, num_pages in enumerate(file_names.values()):
|
| 48 |
+
if current_page > num_pages:
|
| 49 |
+
current_page -= num_pages
|
| 50 |
+
current_file_idx += 1
|
| 51 |
+
else:
|
| 52 |
+
break
|
| 53 |
|
| 54 |
for i in range(0, len(words), word_length):
|
| 55 |
chunk = words[i:i+word_length]
|
|
|
|
| 58 |
text_toks[idx+1] = chunk + text_toks[idx+1]
|
| 59 |
continue
|
| 60 |
chunk = ' '.join(chunk).strip()
|
| 61 |
+
chunk = f'[{list(file_names.keys())[current_file_idx]}, Page no. {current_page}]' + ' ' + '"' + chunk + '"'
|
| 62 |
chunks.append(chunk)
|
| 63 |
return chunks
|
| 64 |
|
| 65 |
|
| 66 |
+
|
| 67 |
class SemanticSearch:
|
| 68 |
|
| 69 |
def __init__(self):
|
|
|
|
| 99 |
def load_recommender(paths, start_page=1):
|
| 100 |
global recommender
|
| 101 |
all_texts = []
|
| 102 |
+
file_names = {}
|
| 103 |
|
| 104 |
for path in paths:
|
| 105 |
texts = pdf_to_text(path, start_page=start_page)
|
| 106 |
all_texts.extend(texts)
|
| 107 |
+
file_names[os.path.basename(path)] = len(texts)
|
| 108 |
|
| 109 |
chunks = text_to_chunks(all_texts, file_names, start_page=start_page)
|
| 110 |
recommender.fit(chunks)
|
| 111 |
return 'Corpus Loaded.'
|
| 112 |
|
| 113 |
|
| 114 |
+
|
| 115 |
|
| 116 |
def generate_text(openAI_key, prompt, engine="text-davinci-003"):
|
| 117 |
openai.api_key = openAI_key
|