Spaces:
Running
Running
import gradio as gr | |
import json | |
import re | |
import sqlite3 | |
import logging | |
from util import process_json_files | |
from gematria import calculate_gematria | |
from deep_translator import GoogleTranslator, exceptions | |
logging.basicConfig(level=logging.INFO, format='%(message)s') | |
def flatten_text(text): | |
"""Helper function to flatten nested lists into a single list.""" | |
if isinstance(text, list): | |
return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text) | |
return text | |
def initialize_database(): | |
conn = sqlite3.connect('gematria.db') | |
c = conn.cursor() | |
c.execute(''' | |
CREATE TABLE IF NOT EXISTS results ( | |
gematria_sum INTEGER, | |
words TEXT, | |
translation TEXT, | |
book INTEGER, | |
title TEXT, | |
chapter INTEGER, | |
verse INTEGER, | |
UNIQUE(gematria_sum, words, book, title, chapter, verse) | |
) | |
''') | |
conn.commit() | |
conn.close() | |
def insert_phrase_to_db(c, gematria_sum, phrase_candidate, translation, book_id, title, chapter_id, verse_id): | |
try: | |
logging.info(f"Inserting: {gematria_sum}, {phrase_candidate}, {translation}, {book_id}, {title}, {chapter_id}, {verse_id}") | |
c.execute(''' | |
INSERT INTO results (gematria_sum, words, translation, book, title, chapter, verse) | |
VALUES (?, ?, ?, ?, ?, ?, ?) | |
''', (gematria_sum, phrase_candidate, translation, book_id, title, chapter_id, verse_id)) | |
except sqlite3.IntegrityError: | |
logging.info(f"Entry already exists: {gematria_sum}, {phrase_candidate}, {book_id}, {title}, {chapter_id}, {verse_id}") | |
def populate_database(tanach_texts, max_phrase_length=1): | |
conn = sqlite3.connect('gematria.db') | |
c = conn.cursor() | |
for book_id, text in enumerate(tanach_texts): | |
if 'text' not in text or not isinstance(text['text'], list): | |
logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.") | |
continue | |
title = text.get('title', 'Unknown') | |
chapters = text['text'] | |
for chapter_id, chapter in enumerate(chapters): | |
if not isinstance(chapter, list): | |
logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.") | |
continue | |
for verse_id, verse in enumerate(chapter): | |
verse_text = flatten_text(verse) | |
verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text) | |
verse_text = re.sub(r" +", " ", verse_text) | |
words = verse_text.split() | |
max_length = min(max_phrase_length, len(words)) | |
for length in range(1, max_length + 1): | |
for start in range(len(words) - length + 1): | |
phrase_candidate = " ".join(words[start:start + length]) | |
gematria_sum = calculate_gematria(phrase_candidate.replace(" ", "")) | |
insert_phrase_to_db(c, gematria_sum, phrase_candidate, None, book_id + 1, title, chapter_id + 1, verse_id + 1) # No translation initially | |
conn.commit() | |
conn.close() | |
def get_translation_from_db(c, phrase, book, chapter, verse): | |
c.execute(''' | |
SELECT translation FROM results | |
WHERE words = ? AND book = ? AND chapter = ? AND verse = ? | |
''', (phrase, book, chapter, verse)) | |
result = c.fetchone() | |
return result[0] if result else None | |
def translate_and_store(conn, phrase, book, chapter, verse): | |
translator = GoogleTranslator(source='iw', target='en') # Explicitly set source to Hebrew | |
c = conn.cursor() | |
try: | |
translation = translator.translate(phrase) | |
logging.info(f"Translated phrase: {translation}") | |
c.execute(''' | |
UPDATE results | |
SET translation = ? | |
WHERE words = ? AND book = ? AND chapter = ? AND verse = ? | |
''', (translation, phrase, book, chapter, verse)) | |
conn.commit() | |
return translation | |
except (exceptions.TranslationNotFound, exceptions.NotValidPayload, | |
exceptions.ServerException, exceptions.RequestError) as e: | |
logging.error(f"Error translating phrase '{phrase}': {e}") | |
return "[Translation Error]" | |
def gematria_search_interface(phrase): | |
debug_output = [] | |
def debug_callback(message): | |
debug_output.append(message) | |
logging.info(message) | |
if not phrase.strip(): | |
return "Please enter a phrase.", "\n".join(debug_output) | |
phrase_gematria = calculate_gematria(phrase.replace(" ", "")) | |
debug_callback(f"Debug: Gematria of the search phrase '{phrase}' is {phrase_gematria}") | |
conn = sqlite3.connect('gematria.db') | |
c = conn.cursor() | |
matching_phrases = search_gematria_in_db(c, phrase_gematria) | |
if not matching_phrases: | |
conn.close() | |
return "No matching phrases found.", "\n".join(debug_output) | |
# Sort matching phrases by book, chapter, and verse | |
matching_phrases.sort(key=lambda x: (x[1], x[3], x[4])) | |
result = "Matching phrases:\n" | |
for match in matching_phrases: | |
if len(match) != 6: # Adjusted length for added translation | |
debug_callback(f"Error: Expected tuple of length 6, but got {len(match)}: {match}") | |
continue | |
words, book, title, chapter, verse, translation = match | |
if not translation: # Check if translation exists | |
translation = translate_and_store(conn, words, book, chapter, verse) | |
result += f"Book: {title} ({book})\nChapter: {chapter}, Verse: {verse}\nPhrase: {words}\nTranslation: {translation}\n\n" | |
conn.close() | |
return result, "\n".join(debug_output) | |
def search_gematria_in_db(c, gematria_sum): | |
c.execute(''' | |
SELECT words, book, title, chapter, verse, translation FROM results WHERE gematria_sum = ? | |
''', (gematria_sum,)) | |
results = c.fetchall() | |
logging.info(f"Search results: {results}") | |
return results | |
def run_test(): | |
debug_output = [] | |
#test_phrase = "ืืืจ ืืืชืื ืืืื ืืื ื" | |
#expected_gematria = 1495 | |
def debug_callback(message): | |
debug_output.append(message) | |
logging.info(message) | |
# Load the test JSON contents for 01.json | |
#test_texts_00 = process_json_files(0, 0) | |
#test_texts_01 = process_json_files(1, 1) | |
#populate_database(test_texts_00, max_phrase_length=22) # Populate the database from book 0 with phrases up to 22 words | |
#populate_database(test_texts_01, max_phrase_length=3) # Populate the database from book 1 with phrases up to 3 words | |
#conn = sqlite3.connect('gematria.db') | |
#c = conn.cursor() | |
#matching_phrases_01 = search_gematria_in_db(c, expected_gematria) | |
#conn.close() | |
#assert matching_phrases_01[0][0] == test_phrase, f"Found phrase does not match: {matching_phrases_01[0][0]}" | |
#print("Test successful: The phrase was correctly found and the gematria matches in 01.json.") | |
#print("\n".join(debug_output)) | |
test_texts = process_json_files(1, 39) | |
populate_database(test_texts, max_phrase_length=1) | |
#populate_database(test_texts, max_phrase_length=2) | |
#populate_database(test_texts, max_phrase_length=3) | |
iface = gr.Interface( | |
fn=gematria_search_interface, | |
inputs=gr.Textbox(label="Enter phrase"), | |
outputs=[gr.Textbox(label="Results"), gr.Textbox(label="Debug Output")], | |
title="Gematria Search in Tanach", | |
description="Search for phrases in Tanach that have the same gematria value as the entered phrase.", | |
live=False, # Disable live update | |
allow_flagging="never" # Disable flagging | |
) | |
if __name__ == "__main__": | |
initialize_database() | |
run_test() # Run tests | |
iface.launch() | |