gematria_date_sums

Running

File size: 7,715 Bytes

import gradio as gr
import json
import re
import sqlite3
import logging
from util import process_json_files
from gematria import calculate_gematria
from deep_translator import GoogleTranslator, exceptions

logging.basicConfig(level=logging.INFO, format='%(message)s')

def flatten_text(text):
    """Helper function to flatten nested lists into a single list."""
    if isinstance(text, list):
        return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
    return text

def initialize_database():
    conn = sqlite3.connect('gematria.db')
    c = conn.cursor()
    c.execute('''
        CREATE TABLE IF NOT EXISTS results (
            gematria_sum INTEGER,
            words TEXT,
            translation TEXT,
            book INTEGER,
            title TEXT,
            chapter INTEGER,
            verse INTEGER,
            UNIQUE(gematria_sum, words, book, title, chapter, verse)
        )
    ''')
    conn.commit()
    conn.close()

def insert_phrase_to_db(c, gematria_sum, phrase_candidate, translation, book_id, title, chapter_id, verse_id):
    try:
        logging.info(f"Inserting: {gematria_sum}, {phrase_candidate}, {translation}, {book_id}, {title}, {chapter_id}, {verse_id}")
        c.execute('''
            INSERT INTO results (gematria_sum, words, translation, book, title, chapter, verse)
            VALUES (?, ?, ?, ?, ?, ?, ?)
        ''', (gematria_sum, phrase_candidate, translation, book_id, title, chapter_id, verse_id))
    except sqlite3.IntegrityError:
        logging.info(f"Entry already exists: {gematria_sum}, {phrase_candidate}, {book_id}, {title}, {chapter_id}, {verse_id}")

def populate_database(tanach_texts, max_phrase_length=1):
    conn = sqlite3.connect('gematria.db')
    c = conn.cursor()
    for book_id, text in enumerate(tanach_texts):
        if 'text' not in text or not isinstance(text['text'], list):
            logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
            continue
        title = text.get('title', 'Unknown')
        chapters = text['text']
        for chapter_id, chapter in enumerate(chapters):
            if not isinstance(chapter, list):
                logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
                continue
            for verse_id, verse in enumerate(chapter):
                verse_text = flatten_text(verse) 
                verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
                verse_text = re.sub(r" +", " ", verse_text)
                words = verse_text.split()
                max_length = min(max_phrase_length, len(words))
                for length in range(1, max_length + 1):
                    for start in range(len(words) - length + 1):
                        phrase_candidate = " ".join(words[start:start + length])
                        gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
                        insert_phrase_to_db(c, gematria_sum, phrase_candidate, None, book_id + 1, title, chapter_id + 1, verse_id + 1) # No translation initially
    conn.commit()
    conn.close()

def get_translation_from_db(c, phrase, book, chapter, verse):
    c.execute('''
        SELECT translation FROM results 
        WHERE words = ? AND book = ? AND chapter = ? AND verse = ?
    ''', (phrase, book, chapter, verse))
    result = c.fetchone()
    return result[0] if result else None

def translate_and_store(conn, phrase, book, chapter, verse):
    translator = GoogleTranslator(source='iw', target='en')  # Explicitly set source to Hebrew
    c = conn.cursor()
    try:
        translation = translator.translate(phrase)
        logging.info(f"Translated phrase: {translation}")
        c.execute('''
            UPDATE results 
            SET translation = ?
            WHERE words = ? AND book = ? AND chapter = ? AND verse = ?
        ''', (translation, phrase, book, chapter, verse))
        conn.commit()
        return translation
    except (exceptions.TranslationNotFound, exceptions.NotValidPayload, 
            exceptions.ServerException, exceptions.RequestError) as e:
        logging.error(f"Error translating phrase '{phrase}': {e}")
        return "[Translation Error]"

def gematria_search_interface(phrase):
    debug_output = []

    def debug_callback(message):
        debug_output.append(message)
        logging.info(message)

    if not phrase.strip():
        return "Please enter a phrase.", "\n".join(debug_output)

    phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
    debug_callback(f"Debug: Gematria of the search phrase '{phrase}' is {phrase_gematria}")

    conn = sqlite3.connect('gematria.db')
    c = conn.cursor()
    matching_phrases = search_gematria_in_db(c, phrase_gematria)

    if not matching_phrases:
        conn.close()
        return "No matching phrases found.", "\n".join(debug_output)

    # Sort matching phrases by book, chapter, and verse
    matching_phrases.sort(key=lambda x: (x[1], x[3], x[4]))

    result = "Matching phrases:\n"
    for match in matching_phrases:
        if len(match) != 6:  # Adjusted length for added translation
            debug_callback(f"Error: Expected tuple of length 6, but got {len(match)}: {match}")
            continue
        words, book, title, chapter, verse, translation = match
        if not translation: # Check if translation exists
            translation = translate_and_store(conn, words, book, chapter, verse)
        result += f"Book: {title} ({book})\nChapter: {chapter}, Verse: {verse}\nPhrase: {words}\nTranslation: {translation}\n\n"

    conn.close()
    return result, "\n".join(debug_output)

def search_gematria_in_db(c, gematria_sum):
    c.execute('''
        SELECT words, book, title, chapter, verse, translation FROM results WHERE gematria_sum = ?
    ''', (gematria_sum,))
    results = c.fetchall()
    logging.info(f"Search results: {results}")
    return results

def run_test():
    debug_output = []
    #test_phrase = "אחר ואתבנימין ואני"
    #expected_gematria = 1495

    def debug_callback(message):
        debug_output.append(message)
        logging.info(message)

    # Load the test JSON contents for 01.json
    #test_texts_00 = process_json_files(0, 0)
    #test_texts_01 = process_json_files(1, 1)
    #populate_database(test_texts_00, max_phrase_length=22)  # Populate the database from book 0 with phrases up to 22 words
    #populate_database(test_texts_01, max_phrase_length=3)  # Populate the database from book 1 with phrases up to 3 words
    #conn = sqlite3.connect('gematria.db')
    #c = conn.cursor()
    #matching_phrases_01 = search_gematria_in_db(c, expected_gematria)
    #conn.close()
    #assert matching_phrases_01[0][0] == test_phrase, f"Found phrase does not match: {matching_phrases_01[0][0]}"
    #print("Test successful: The phrase was correctly found and the gematria matches in 01.json.")
    #print("\n".join(debug_output))

    test_texts = process_json_files(1, 39)
    populate_database(test_texts, max_phrase_length=1)
    #populate_database(test_texts, max_phrase_length=2)
    #populate_database(test_texts, max_phrase_length=3)

iface = gr.Interface(
    fn=gematria_search_interface,
    inputs=gr.Textbox(label="Enter phrase"),
    outputs=[gr.Textbox(label="Results"), gr.Textbox(label="Debug Output")],
    title="Gematria Search in Tanach",
    description="Search for phrases in Tanach that have the same gematria value as the entered phrase.",
    live=False,  # Disable live update
    allow_flagging="never"  # Disable flagging 
)

if __name__ == "__main__":
    initialize_database()
    run_test()  # Run tests
    iface.launch()