gematria_date_sums

Running

File size: 12,434 Bytes

import gradio as gr
import json
import re
import sqlite3
import logging
from collections import defaultdict
from typing import Tuple, Dict, List

from util import process_json_files
from gematria import calculate_gematria
from deep_translator import GoogleTranslator, exceptions
from urllib.parse import quote_plus
from tqdm import tqdm

# Constants
DATABASE_FILE = 'gematria.db'
MAX_PHRASE_LENGTH_LIMIT = 20
BATCH_SIZE = 10000

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Global variables
conn: sqlite3.Connection = None
translator: GoogleTranslator = None
book_names: Dict[int, str] = {}
gematria_cache: Dict[Tuple[int, int], List[Tuple[str, str, int, int, int, str]]] = {}
translation_cache: Dict[str, str] = {}
total_word_count: int = 0  # Global counter for word position


def initialize_database() -> None:
    """Initializes the SQLite database."""
    global conn
    conn = sqlite3.connect(DATABASE_FILE)
    cursor = conn.cursor()

    cursor.execute('''
    CREATE TABLE IF NOT EXISTS results (
        gematria_sum INTEGER,
        words TEXT,
        translation TEXT,
        book TEXT,
        chapter INTEGER,
        verse INTEGER,
        phrase_length INTEGER,
        word_position TEXT, 
        PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position)  -- Primary key constraint
    )
    ''')

    cursor.execute('''
    CREATE INDEX IF NOT EXISTS idx_results_gematria
    ON results (gematria_sum)
    ''')

    cursor.execute('''
    CREATE TABLE IF NOT EXISTS processed_books (
        book TEXT PRIMARY KEY,
        max_phrase_length INTEGER
    )
    ''')

    conn.commit()


def initialize_translator() -> None:
    """Initializes the Google Translator."""
    global translator
    translator = GoogleTranslator(source='iw', target='en')
    logging.info("Translator initialized.")


def process_book(book_id: int, max_phrase_length: int, cursor):
    """Processes a single book and returns phrases to insert."""
    global book_names, total_word_count
    book_data = process_json_files(book_id, book_id)
    phrases_to_insert = []

    if book_id in book_data:
        book_data = book_data[book_id]
        if 'title' not in book_data or not isinstance(book_data['title'], str):
            logging.warning(f"Skipping book {book_id} due to missing 'title' field.")
            return phrases_to_insert

        title = book_data['title']
        book_names[book_id] = title

        # Check if this book has already been processed for this phrase length
        cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (title,))
        result = cursor.fetchone()
        if result and result[0] >= max_phrase_length:
            logging.info(f"Skipping book {title}: Already processed with max_phrase_length {result[0]}")
            return phrases_to_insert

        if 'text' not in book_data or not isinstance(book_data['text'], list):
            logging.warning(f"Skipping book {book_id} due to missing 'text' field.")
            return phrases_to_insert

        chapters = book_data['text']
        for chapter_id, chapter in enumerate(chapters):
            for verse_id, verse in enumerate(chapter):
                verse_text = flatten_text(verse)
                verse_text = re.sub(r'\[.*?\]', '', verse_text)
                verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
                verse_text = re.sub(r" +", " ", verse_text)
                words = verse_text.split()

                for length in range(1, max_phrase_length + 1):
                    for start in range(len(words) - length + 1):
                        phrase_candidate = " ".join(words[start:start + length])
                        gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))

                        word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}"

                        phrases_to_insert.append(
                            (gematria_sum, phrase_candidate, None, title, chapter_id + 1, verse_id + 1, length,
                             word_position_range))

                total_word_count += len(words)

    return phrases_to_insert


def populate_database(start_book: int, end_book: int, max_phrase_length: int = 1) -> None:
    """Populates the database with phrases from the Tanach."""
    global conn, book_names, total_word_count
    logging.info(f"Populating database with books from {start_book} to {end_book}...")

    with sqlite3.connect(DATABASE_FILE) as conn:
        cursor = conn.cursor()

        for book_id in tqdm(range(start_book, end_book + 1), desc="Processing Books"):
            phrases_to_insert = process_book(book_id, max_phrase_length, cursor)

            if phrases_to_insert:
                cursor.executemany('''
                INSERT OR IGNORE INTO results (gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position) 
                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                ''', phrases_to_insert)

                # Update processed_books after processing each book
                cursor.execute('''
                INSERT OR REPLACE INTO processed_books (book, max_phrase_length)
                VALUES (?, ?)
                ''', (book_names[book_id], max_phrase_length))

                conn.commit()

        total_word_count = 0  # Reset for the next set of phrase lengths


def get_translation(phrase: str) -> str:
    """Retrieves or generates the English translation of a Hebrew phrase
    and caches it in the database.
    """
    global conn, translator, translation_cache

    # Check if the translation exists in the database
    with sqlite3.connect(DATABASE_FILE) as conn:
        cursor = conn.cursor()
        cursor.execute("SELECT translation FROM results WHERE words = ? LIMIT 1", (phrase,))
        result = cursor.fetchone()
        if result and result[0]:  # If a translation exists, use it
            return result[0]

    # If no translation in the database, translate and store it
    translation = translate_and_store(phrase)
    translation_cache[phrase] = translation

    # Update the database with the new translation
    with sqlite3.connect(DATABASE_FILE) as conn:
        cursor = conn.cursor()
        cursor.execute("UPDATE results SET translation = ? WHERE words = ?", (translation, phrase))
        conn.commit()

    return translation


def translate_and_store(phrase: str) -> str:
    """Translates a Hebrew phrase to English using Google Translate."""
    global translator
    max_retries = 3
    retries = 0
    while retries < max_retries:
        try:
            translation = translator.translate(phrase)
            return translation
        except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
                exceptions.ServerException, exceptions.RequestError) as e:
            retries += 1
            logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})")
    logging.error(f"Failed to translate phrase '{phrase}' after {max_retries} retries.")
    return "[Translation Error]"


def search_gematria_in_db(gematria_sum: int, max_words: int) -> List[Tuple[str, str, int, int, int, str]]:
    """Searches the database for phrases with a given Gematria value."""
    global conn
    with sqlite3.connect(DATABASE_FILE) as conn:
        cursor = conn.cursor()
        cursor.execute('''
        SELECT words, book, chapter, verse, phrase_length, word_position 
        FROM results 
        WHERE gematria_sum = ? AND phrase_length <= ?
        ''', (gematria_sum, max_words))
        results = cursor.fetchall()
    return results


def gematria_search_interface(phrase: str, max_words: int, show_translation: bool) -> str:
    """The main function for the Gradio interface."""
    if not phrase.strip():
        return "Please enter a phrase."

    global conn, book_names, gematria_cache

    numbers = re.findall(r'\d+', phrase)
    text_without_numbers = re.sub(r'\d+', '', phrase)
    phrase_gematria = calculate_gematria(text_without_numbers.replace(" ", ""))
    phrase_gematria += sum(int(number) for number in numbers)

    if (phrase_gematria, max_words) in gematria_cache:
        matching_phrases = gematria_cache[(phrase_gematria, max_words)]
    else:
        matching_phrases = search_gematria_in_db(phrase_gematria, max_words)
        gematria_cache[(phrase_gematria, max_words)] = matching_phrases

    if not matching_phrases:
        return "No matching phrases found."

    sorted_phrases = sorted(matching_phrases,
                            key=lambda x: (int(list(book_names.keys())[list(book_names.values()).index(x[1])]), x[2], x[3]))
    results_by_book = defaultdict(list)
    for words, book, chapter, verse, phrase_length, word_position in sorted_phrases:
        results_by_book[book].append((words, chapter, verse, phrase_length, word_position))

    results = []
    results.append("<div class='results-container'>")
    for book, phrases in results_by_book.items():
        for words, chapter, verse, phrase_length, word_position in phrases:
            translation = get_translation(words) if show_translation else ""
            link = f"https://www.biblegateway.com/passage/?search={quote_plus(book)}+{chapter}%3A{verse}&version=CJB"
            results.append(f"""
            <div class='result-item'>
              <p><b>Book:</b> {book}</p>
              <p><b>Chapter:</b> {chapter}, <b>Verse:</b> {verse}</p>
              <p class='hebrew-phrase'><b>Hebrew Phrase:</b> {words}</p>
              <p><b>Translation:</b> {translation}</p>
              <p><b>Phrase Length:</b> {phrase_length} words</p>
              <p><b>Phrase Gematria:</b> {phrase_gematria}</p>
              <p><b>Word Position in the Tanach:</b> {word_position}</p> 
              <a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a>
            </div>
            """)
    results.append("</div>")

    # Style modified to position search on top and results below
    style = """
    <style>
      .results-container {
        display: grid;
        grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
        gap: 20px;
        width: 100%;  /* Make results container take full width */
      }
      .result-item {
        border: 1px solid #ccc;
        padding: 15px;
        border-radius: 5px;
        box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1);
      }
      .hebrew-phrase {
        font-family: 'SBL Hebrew', 'Ezra SIL', serif;
        direction: rtl;
      }
      .bible-link {
        display: block;
        margin-top: 10px;
        color: #007bff;
        text-decoration: none;
      }
    </style>
    """
    return style + "\n".join(results)


def flatten_text(text: List) -> str:
    """Flattens nested lists into a single list."""
    if isinstance(text, list):
        return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
    return text


def run_app() -> None:
    """Initializes and launches the Gradio app."""
    global conn
    initialize_database()
    initialize_translator()

    logging.info("Starting database population...")
    for max_phrase_length in range(1, MAX_PHRASE_LENGTH_LIMIT + 1):
        populate_database(1, 39, max_phrase_length=max_phrase_length)
    logging.info("Database population complete.")

    with gr.Blocks() as iface:  # Use gr.Blocks() for layout control
        with gr.Row():  # Place inputs in a row
            textbox = gr.Textbox(label="Enter word(s) or numbers")
            slider = gr.Slider(label="Max Word Count in Result Phrases", minimum=1, maximum=MAX_PHRASE_LENGTH_LIMIT, step=1,
                              value=1)
            checkbox = gr.Checkbox(label="Show Translation", value=True)
        with gr.Row():  # Place buttons in a row
            clear_button = gr.Button("Clear")
            submit_button = gr.Button("Submit", variant="primary")

        html_output = gr.HTML(label="Results")  # Output for the results

        submit_button.click(fn=gematria_search_interface,
                            inputs=[textbox, slider, checkbox],
                            outputs=html_output)
        clear_button.click(fn=lambda: "", inputs=None, outputs=html_output)  # Clear the output

    iface.launch()


if __name__ == "__main__":
    run_app()