import gradio as gr import json import re import sqlite3 import logging from util import process_json_files from gematria import calculate_gematria from deep_translator import GoogleTranslator, exceptions logging.basicConfig(level=logging.INFO, format='%(message)s') def flatten_text(text): """Helper function to flatten nested lists into a single list.""" if isinstance(text, list): return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text) return text def initialize_database(): conn = sqlite3.connect('gematria.db') c = conn.cursor() c.execute(''' CREATE TABLE IF NOT EXISTS results ( gematria_sum INTEGER, words TEXT, translation TEXT, book INTEGER, title TEXT, chapter INTEGER, verse INTEGER, UNIQUE(gematria_sum, words, book, title, chapter, verse) ) ''') conn.commit() conn.close() def insert_phrase_to_db(c, gematria_sum, phrase_candidate, translation, book_id, title, chapter_id, verse_id): try: logging.info(f"Inserting: {gematria_sum}, {phrase_candidate}, {translation}, {book_id}, {title}, {chapter_id}, {verse_id}") c.execute(''' INSERT INTO results (gematria_sum, words, translation, book, title, chapter, verse) VALUES (?, ?, ?, ?, ?, ?, ?) ''', (gematria_sum, phrase_candidate, translation, book_id, title, chapter_id, verse_id)) except sqlite3.IntegrityError: logging.info(f"Entry already exists: {gematria_sum}, {phrase_candidate}, {book_id}, {title}, {chapter_id}, {verse_id}") def populate_database(tanach_texts, max_phrase_length=1): conn = sqlite3.connect('gematria.db') c = conn.cursor() for book_id, text in enumerate(tanach_texts): if 'text' not in text or not isinstance(text['text'], list): logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.") continue title = text.get('title', 'Unknown') chapters = text['text'] for chapter_id, chapter in enumerate(chapters): if not isinstance(chapter, list): logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.") continue for verse_id, verse in enumerate(chapter): verse_text = flatten_text(verse) verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text) verse_text = re.sub(r" +", " ", verse_text) words = verse_text.split() max_length = min(max_phrase_length, len(words)) for length in range(1, max_length + 1): for start in range(len(words) - length + 1): phrase_candidate = " ".join(words[start:start + length]) gematria_sum = calculate_gematria(phrase_candidate.replace(" ", "")) insert_phrase_to_db(c, gematria_sum, phrase_candidate, None, book_id + 1, title, chapter_id + 1, verse_id + 1) # No translation initially conn.commit() conn.close() def get_translation_from_db(c, phrase, book, chapter, verse): c.execute(''' SELECT translation FROM results WHERE words = ? AND book = ? AND chapter = ? AND verse = ? ''', (phrase, book, chapter, verse)) result = c.fetchone() return result[0] if result else None def translate_and_store(conn, phrase, book, chapter, verse): translator = GoogleTranslator(source='iw', target='en') # Explicitly set source to Hebrew c = conn.cursor() try: translation = translator.translate(phrase) logging.info(f"Translated phrase: {translation}") c.execute(''' UPDATE results SET translation = ? WHERE words = ? AND book = ? AND chapter = ? AND verse = ? ''', (translation, phrase, book, chapter, verse)) conn.commit() return translation except (exceptions.TranslationNotFound, exceptions.NotValidPayload, exceptions.ServerException, exceptions.RequestError) as e: logging.error(f"Error translating phrase '{phrase}': {e}") return "[Translation Error]" def gematria_search_interface(phrase): debug_output = [] def debug_callback(message): debug_output.append(message) logging.info(message) if not phrase.strip(): return "Please enter a phrase.", "\n".join(debug_output) phrase_gematria = calculate_gematria(phrase.replace(" ", "")) debug_callback(f"Debug: Gematria of the search phrase '{phrase}' is {phrase_gematria}") conn = sqlite3.connect('gematria.db') c = conn.cursor() matching_phrases = search_gematria_in_db(c, phrase_gematria) if not matching_phrases: conn.close() return "No matching phrases found.", "\n".join(debug_output) # Sort matching phrases by book, chapter, and verse matching_phrases.sort(key=lambda x: (x[1], x[3], x[4])) result = "Matching phrases:\n" for match in matching_phrases: if len(match) != 6: # Adjusted length for added translation debug_callback(f"Error: Expected tuple of length 6, but got {len(match)}: {match}") continue words, book, title, chapter, verse, translation = match if not translation: # Check if translation exists translation = translate_and_store(conn, words, book, chapter, verse) result += f"Book: {title} ({book})\nChapter: {chapter}, Verse: {verse}\nPhrase: {words}\nTranslation: {translation}\n\n" conn.close() return result, "\n".join(debug_output) def search_gematria_in_db(c, gematria_sum): c.execute(''' SELECT words, book, title, chapter, verse, translation FROM results WHERE gematria_sum = ? ''', (gematria_sum,)) results = c.fetchall() logging.info(f"Search results: {results}") return results def run_test(): debug_output = [] #test_phrase = "אחר ואתבנימין ואני" #expected_gematria = 1495 def debug_callback(message): debug_output.append(message) logging.info(message) # Load the test JSON contents for 01.json #test_texts_00 = process_json_files(0, 0) #test_texts_01 = process_json_files(1, 1) #populate_database(test_texts_00, max_phrase_length=22) # Populate the database from book 0 with phrases up to 22 words #populate_database(test_texts_01, max_phrase_length=3) # Populate the database from book 1 with phrases up to 3 words #conn = sqlite3.connect('gematria.db') #c = conn.cursor() #matching_phrases_01 = search_gematria_in_db(c, expected_gematria) #conn.close() #assert matching_phrases_01[0][0] == test_phrase, f"Found phrase does not match: {matching_phrases_01[0][0]}" #print("Test successful: The phrase was correctly found and the gematria matches in 01.json.") #print("\n".join(debug_output)) test_texts = process_json_files(1, 39) populate_database(test_texts, max_phrase_length=1) #populate_database(test_texts, max_phrase_length=2) #populate_database(test_texts, max_phrase_length=3) iface = gr.Interface( fn=gematria_search_interface, inputs=gr.Textbox(label="Enter phrase"), outputs=[gr.Textbox(label="Results"), gr.Textbox(label="Debug Output")], title="Gematria Search in Tanach", description="Search for phrases in Tanach that have the same gematria value as the entered phrase.", live=False, # Disable live update allow_flagging="never" # Disable flagging ) if __name__ == "__main__": initialize_database() run_test() # Run tests iface.launch()