import gradio as gr import json import re import sqlite3 import logging from util import process_json_files from gematria import calculate_gematria from deep_translator import GoogleTranslator logging.basicConfig(level=logging.INFO, format='%(message)s') def flatten_text(text): """Helper function to flatten nested lists into a single list.""" if isinstance(text, list): return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text) return text def initialize_database(): conn = sqlite3.connect('gematria.db') c = conn.cursor() c.execute(''' CREATE TABLE IF NOT EXISTS results ( gematria_sum INTEGER, words TEXT, book INTEGER, title TEXT, chapter INTEGER, verse INTEGER ) ''') conn.commit() conn.close() def insert_phrase_to_db(c, gematria_sum, phrase_candidate, book_id, title, chapter_id, verse_id): logging.info(f"Inserting: {gematria_sum}, {phrase_candidate}, {book_id}, {title}, {chapter_id + 1}, {verse_id + 1}") c.execute(''' INSERT INTO results (gematria_sum, words, book, title, chapter, verse) VALUES (?, ?, ?, ?, ?, ?) ''', (gematria_sum, phrase_candidate, book_id, title, chapter_id + 1, verse_id + 1)) def populate_database(tanach_texts, max_phrase_length=1): conn = sqlite3.connect('gematria.db') c = conn.cursor() for book_id, text in enumerate(tanach_texts): if not isinstance(text.get('text'), list): continue title = text.get('title', 'Unknown') chapters = text['text'] for chapter_id, chapter in enumerate(chapters): if not isinstance(chapter, list): continue chapter_text = ' '.join([flatten_text(verse) for verse in chapter]) chapter_text = re.sub(r"[^\u05D0-\u05EA ]+", "", chapter_text) chapter_text = re.sub(r" +", " ", chapter_text) words = chapter_text.split() max_length = min(max_phrase_length, len(words)) for length in range(1, max_length + 1): for start in range(len(words) - length + 1): phrase_candidate = " ".join(words[start:start + length]) gematria_sum = calculate_gematria(phrase_candidate.replace(" ", "")) insert_phrase_to_db(c, gematria_sum, phrase_candidate, book_id, title, chapter_id, start + 1) conn.commit() conn.close() def search_gematria_in_db(gematria_sum): conn = sqlite3.connect('gematria.db') c = conn.cursor() c.execute(''' SELECT words, book, title, chapter, verse FROM results WHERE gematria_sum = ? ''', (gematria_sum,)) results = c.fetchall() conn.close() return results def translate_phrases(phrases): translator = GoogleTranslator(source='auto', target='en') translated_phrases = [] for phrase in phrases: translated_phrases.append(translator.translate(phrase)) return translated_phrases def db(tanach_texts, max_phrase_length=1): initialize_database() populate_database(tanach_texts, max_phrase_length) logging.info("Database successfully created and populated.") def gematria_search_interface(phrase): debug_output = [] def debug_callback(message): debug_output.append(message) logging.info(message) if not phrase.strip(): return "Please enter a phrase.", "\n".join(debug_output) phrase_gematria = calculate_gematria(phrase.replace(" ", "")) debug_callback(f"Debug: Gematria of the search phrase '{phrase}' is {phrase_gematria}") matching_phrases = search_gematria_in_db(phrase_gematria) if not matching_phrases: return "No matching phrases found.", "\n".join(debug_output) phrases = [match[0] for match in matching_phrases] translations = translate_phrases(phrases) result = "Matching phrases:\n" for match, translation in zip(matching_phrases, translations): result += f"Book: {match[2]} ({match[3]})\nChapter: {match[4]}, Verse: {match[5]}\nPhrase: {match[0]}\nTranslation: {translation}\n\n" return result, "\n".join(debug_callback) def run_test(): debug_output = [] test_phrase = "אבגדהוזחטיכלמנסעפצקרשת" expected_gematria = 1495 def debug_callback(message): debug_output.append(message) logging.info(message) # Load the test JSON contents for 00.json test_texts_00 = process_json_files(0, 0) db(test_texts_00, max_phrase_length=22) # Populate the database with 1-word phrases matching_phrases_00 = search_gematria_in_db(expected_gematria) assert matching_phrases_00, "No matching phrases found in 00.json." assert matching_phrases_00[0][0].replace(" ", "") == test_phrase, f"Found phrase does not match: {matching_phrases_00[0][0]}" print("Test successful: The phrase was correctly found and the gematria matches in 00.json.") # Load the test JSON contents for 01.json test_texts_01 = process_json_files(1, 1) db(test_texts_01, max_phrase_length=2) # Populate the database with 1-word phrases search_phrase_01 = "אתקלך שמעתי" expected_gematria_01 = calculate_gematria(search_phrase_01.replace(" ", "")) matching_phrases_01 = search_gematria_in_db(expected_gematria_01) assert matching_phrases_01, "No matching phrases found in 01.json." assert matching_phrases_01[0][0].replace(" ", "") == search_phrase_01.replace(" ", ""), f"Found phrase does not match: {matching_phrases_01[0][0]}" print("Test successful: The phrase was correctly found and the gematria matches in 01.json.") print("\n".join(debug_output)) iface = gr.Interface( fn=gematria_search_interface, inputs=gr.inputs.Textbox(label="Enter phrase"), outputs=[gr.outputs.Textbox(label="Results"), gr.outputs.Textbox(label="Debug Output")], title="Gematria Search in Tanach", description="Search for phrases in Tanach that have the same gematria value as the entered phrase.", live=False, # Disable live update allow_flagging=False, # Disable flagging for simplicity ) if __name__ == "__main__": run_test() # Run tests iface.launch()