Spaces:
Sleeping
Sleeping
bartman081523
commited on
Commit
·
d8cc20c
1
Parent(s):
14f19e7
implement result word count
Browse files- app.py +61 -27
- gematria.db +2 -2
- gematria.db-journal +0 -3
app.py
CHANGED
@@ -21,6 +21,9 @@ book_names = {}
|
|
21 |
# Pre-load Gematria values for common phrases to speed up search
|
22 |
gematria_cache = {}
|
23 |
|
|
|
|
|
|
|
24 |
def initialize_database():
|
25 |
"""Initializes the SQLite database."""
|
26 |
global conn
|
@@ -45,6 +48,12 @@ def initialize_database():
|
|
45 |
max_phrase_length INTEGER
|
46 |
)
|
47 |
''')
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
def initialize_translator():
|
50 |
"""Initializes the Google Translator."""
|
@@ -103,6 +112,12 @@ def populate_database(start_book, end_book, max_phrase_length=1):
|
|
103 |
gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
|
104 |
yield gematria_sum, phrase_candidate, title, chapter_id + 1, verse_id + 1
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
def insert_phrases_to_db(phrases):
|
107 |
"""Inserts a list of phrases into the database efficiently."""
|
108 |
global conn
|
@@ -119,23 +134,26 @@ def insert_phrases_to_db(phrases):
|
|
119 |
|
120 |
def get_translation(phrase):
|
121 |
"""Retrieves or generates the English translation of a Hebrew phrase."""
|
122 |
-
global translator, conn
|
123 |
-
|
124 |
-
|
125 |
-
SELECT translation FROM results
|
126 |
-
WHERE words = ?
|
127 |
-
''', (phrase,))
|
128 |
-
result = cursor.fetchone()
|
129 |
-
if result and result[0]:
|
130 |
-
return result[0]
|
131 |
else:
|
132 |
-
|
133 |
cursor.execute('''
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
def translate_and_store(phrase):
|
141 |
"""Translates a Hebrew phrase to English using Google Translate and handles potential errors."""
|
@@ -157,15 +175,22 @@ def translate_and_store(phrase):
|
|
157 |
return "[Translation Error]"
|
158 |
|
159 |
def search_gematria_in_db(gematria_sum, max_words):
|
160 |
-
"""Searches the database for phrases with a given Gematria value and word count.
|
|
|
161 |
global conn
|
162 |
cursor = conn.cursor()
|
163 |
cursor.execute('''
|
164 |
-
SELECT words, book, chapter, verse FROM results WHERE gematria_sum = ?
|
165 |
-
''', (gematria_sum,)) #
|
166 |
results = cursor.fetchall()
|
167 |
-
|
168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
|
170 |
def gematria_search_interface(phrase, max_words, show_translation):
|
171 |
"""The main function for the Gradio interface."""
|
@@ -179,25 +204,33 @@ def gematria_search_interface(phrase, max_words, show_translation):
|
|
179 |
phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
|
180 |
logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
|
181 |
|
|
|
|
|
|
|
|
|
182 |
# Check if Gematria is in cache
|
183 |
if phrase_gematria in gematria_cache:
|
184 |
matching_phrases = gematria_cache[phrase_gematria]
|
|
|
185 |
else:
|
186 |
# Search in the database
|
187 |
matching_phrases = search_gematria_in_db(phrase_gematria, max_words)
|
188 |
# Cache the results for future searches
|
189 |
gematria_cache[phrase_gematria] = matching_phrases
|
|
|
190 |
|
191 |
if not matching_phrases:
|
192 |
return "No matching phrases found."
|
193 |
|
194 |
# Sort results by book, chapter, and verse
|
195 |
sorted_phrases = sorted(matching_phrases, key=lambda x: (int(list(book_names.keys())[list(book_names.values()).index(x[1])]), x[2], x[3]))
|
|
|
196 |
|
197 |
# Group results by book
|
198 |
results_by_book = defaultdict(list)
|
199 |
for words, book, chapter, verse in sorted_phrases:
|
200 |
results_by_book[book].append((words, chapter, verse))
|
|
|
201 |
|
202 |
# Format results for display
|
203 |
results = []
|
@@ -265,20 +298,21 @@ def run_app():
|
|
265 |
# Pre-populate the database
|
266 |
logging.info("Starting database population...")
|
267 |
phrases_to_insert = [] # Collect phrases before inserting in bulk
|
268 |
-
for
|
269 |
-
|
270 |
-
|
|
|
|
|
|
|
|
|
271 |
insert_phrases_to_db(phrases_to_insert)
|
272 |
-
phrases_to_insert = []
|
273 |
-
if phrases_to_insert: # Insert remaining phrases
|
274 |
-
insert_phrases_to_db(phrases_to_insert)
|
275 |
logging.info("Database population complete.")
|
276 |
|
277 |
iface = gr.Interface(
|
278 |
fn=gematria_search_interface,
|
279 |
inputs=[
|
280 |
gr.Textbox(label="Enter phrase"),
|
281 |
-
gr.Number(label="Max Word Count", value=
|
282 |
gr.Checkbox(label="Show Translation", value=True)
|
283 |
],
|
284 |
outputs=gr.HTML(label="Results"),
|
|
|
21 |
# Pre-load Gematria values for common phrases to speed up search
|
22 |
gematria_cache = {}
|
23 |
|
24 |
+
# Dictionary to store translations
|
25 |
+
translation_cache = {}
|
26 |
+
|
27 |
def initialize_database():
|
28 |
"""Initializes the SQLite database."""
|
29 |
global conn
|
|
|
48 |
max_phrase_length INTEGER
|
49 |
)
|
50 |
''')
|
51 |
+
cursor.execute('''
|
52 |
+
CREATE TABLE IF NOT EXISTS translations (
|
53 |
+
hebrew_phrase TEXT PRIMARY KEY,
|
54 |
+
english_translation TEXT
|
55 |
+
)
|
56 |
+
''')
|
57 |
|
58 |
def initialize_translator():
|
59 |
"""Initializes the Google Translator."""
|
|
|
112 |
gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
|
113 |
yield gematria_sum, phrase_candidate, title, chapter_id + 1, verse_id + 1
|
114 |
|
115 |
+
# Mark the book as processed with the current max_phrase_length
|
116 |
+
cursor.execute('''
|
117 |
+
INSERT OR REPLACE INTO processed_books (book, max_phrase_length)
|
118 |
+
VALUES (?, ?)
|
119 |
+
''', (title, max_phrase_length))
|
120 |
+
|
121 |
def insert_phrases_to_db(phrases):
|
122 |
"""Inserts a list of phrases into the database efficiently."""
|
123 |
global conn
|
|
|
134 |
|
135 |
def get_translation(phrase):
|
136 |
"""Retrieves or generates the English translation of a Hebrew phrase."""
|
137 |
+
global translator, conn, translation_cache
|
138 |
+
if phrase in translation_cache:
|
139 |
+
return translation_cache[phrase]
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
else:
|
141 |
+
cursor = conn.cursor()
|
142 |
cursor.execute('''
|
143 |
+
SELECT english_translation FROM translations
|
144 |
+
WHERE hebrew_phrase = ?
|
145 |
+
''', (phrase,))
|
146 |
+
result = cursor.fetchone()
|
147 |
+
if result and result[0]:
|
148 |
+
translation = result[0]
|
149 |
+
return translation
|
150 |
+
else:
|
151 |
+
translation = translate_and_store(phrase)
|
152 |
+
cursor.execute('''
|
153 |
+
INSERT OR IGNORE INTO translations (hebrew_phrase, english_translation)
|
154 |
+
VALUES (?, ?)
|
155 |
+
''', (phrase, translation))
|
156 |
+
return translation
|
157 |
|
158 |
def translate_and_store(phrase):
|
159 |
"""Translates a Hebrew phrase to English using Google Translate and handles potential errors."""
|
|
|
175 |
return "[Translation Error]"
|
176 |
|
177 |
def search_gematria_in_db(gematria_sum, max_words):
|
178 |
+
"""Searches the database for phrases with a given Gematria value and word count.
|
179 |
+
Returns phrases with word count <= max_words."""
|
180 |
global conn
|
181 |
cursor = conn.cursor()
|
182 |
cursor.execute('''
|
183 |
+
SELECT words, book, chapter, verse FROM results WHERE gematria_sum = ?
|
184 |
+
''', (gematria_sum,)) # Retrieve all matching phrases first
|
185 |
results = cursor.fetchall()
|
186 |
+
filtered_results = []
|
187 |
+
for words, book, chapter, verse in results:
|
188 |
+
# Filter by word count (including phrases with fewer words)
|
189 |
+
word_count = words.count(' ') + 1 # Count spaces to get word count
|
190 |
+
if word_count <= max_words: # Include phrases with word count <= max_words
|
191 |
+
filtered_results.append((words, book, chapter, verse))
|
192 |
+
logging.debug(f"Found {len(filtered_results)} matching phrases for Gematria: {gematria_sum} after filtering.")
|
193 |
+
return filtered_results
|
194 |
|
195 |
def gematria_search_interface(phrase, max_words, show_translation):
|
196 |
"""The main function for the Gradio interface."""
|
|
|
204 |
phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
|
205 |
logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
|
206 |
|
207 |
+
# Debugging output
|
208 |
+
logging.debug(f"Phrase Gematria: {phrase_gematria}")
|
209 |
+
logging.debug(f"Max Words: {max_words}")
|
210 |
+
|
211 |
# Check if Gematria is in cache
|
212 |
if phrase_gematria in gematria_cache:
|
213 |
matching_phrases = gematria_cache[phrase_gematria]
|
214 |
+
logging.debug(f"Retrieved matching phrases from cache.")
|
215 |
else:
|
216 |
# Search in the database
|
217 |
matching_phrases = search_gematria_in_db(phrase_gematria, max_words)
|
218 |
# Cache the results for future searches
|
219 |
gematria_cache[phrase_gematria] = matching_phrases
|
220 |
+
logging.debug(f"Retrieved matching phrases from database.")
|
221 |
|
222 |
if not matching_phrases:
|
223 |
return "No matching phrases found."
|
224 |
|
225 |
# Sort results by book, chapter, and verse
|
226 |
sorted_phrases = sorted(matching_phrases, key=lambda x: (int(list(book_names.keys())[list(book_names.values()).index(x[1])]), x[2], x[3]))
|
227 |
+
logging.debug(f"Sorted matching phrases: {sorted_phrases}")
|
228 |
|
229 |
# Group results by book
|
230 |
results_by_book = defaultdict(list)
|
231 |
for words, book, chapter, verse in sorted_phrases:
|
232 |
results_by_book[book].append((words, chapter, verse))
|
233 |
+
logging.debug(f"Grouped results by book: {results_by_book}")
|
234 |
|
235 |
# Format results for display
|
236 |
results = []
|
|
|
298 |
# Pre-populate the database
|
299 |
logging.info("Starting database population...")
|
300 |
phrases_to_insert = [] # Collect phrases before inserting in bulk
|
301 |
+
for max_phrase_length in range(1, 5): # Populate for phrases up to 5 words
|
302 |
+
for gematria_sum, phrase, book, chapter, verse in tqdm(populate_database(1, 39, max_phrase_length=max_phrase_length), desc=f"Populating Database (Max Length: {max_phrase_length})"): # Books 1 to 39
|
303 |
+
phrases_to_insert.append((gematria_sum, phrase, book, chapter, verse))
|
304 |
+
if len(phrases_to_insert) >= 1000: # Insert in batches of 1000 for efficiency
|
305 |
+
insert_phrases_to_db(phrases_to_insert)
|
306 |
+
phrases_to_insert = []
|
307 |
+
if phrases_to_insert: # Insert remaining phrases
|
308 |
insert_phrases_to_db(phrases_to_insert)
|
|
|
|
|
|
|
309 |
logging.info("Database population complete.")
|
310 |
|
311 |
iface = gr.Interface(
|
312 |
fn=gematria_search_interface,
|
313 |
inputs=[
|
314 |
gr.Textbox(label="Enter phrase"),
|
315 |
+
gr.Number(label="Max Word Count in Results", value=1, minimum=1, maximum=10),
|
316 |
gr.Checkbox(label="Show Translation", value=True)
|
317 |
],
|
318 |
outputs=gr.HTML(label="Results"),
|
gematria.db
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3fff2778c6444cf9818af0a203c662ca75d295b93c5921e1de8414dcaaf7650
|
3 |
+
size 101384192
|
gematria.db-journal
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:df20400802de5bf6a4cb7e49dd564de73907a76b093ce7e9c523114544c2a325
|
3 |
-
size 443744
|
|
|
|
|
|
|
|