File size: 7,715 Bytes
9be760a
 
 
 
 
 
 
7445a27
9be760a
 
 
 
f16f0ac
9be760a
 
 
 
 
 
 
 
 
 
 
c160986
9be760a
 
 
7445a27
 
9be760a
 
 
 
 
c160986
7445a27
c160986
7445a27
c160986
 
 
7445a27
c160986
9be760a
 
 
 
 
c160986
 
9be760a
 
 
 
 
c160986
9be760a
c160986
 
 
 
 
 
 
 
 
 
 
9be760a
 
 
c160986
9be760a
c160986
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9be760a
 
 
 
 
 
 
 
 
f16f0ac
9be760a
 
f16f0ac
9be760a
c160986
 
 
9be760a
 
c160986
f16f0ac
9be760a
28c065d
 
 
f16f0ac
c160986
 
 
7445a27
c160986
 
 
 
 
 
7445a27
9be760a
c160986
 
 
 
 
 
 
 
9be760a
 
26e9493
 
9be760a
 
 
 
 
f16f0ac
26e9493
 
 
 
 
 
 
 
c160986
26e9493
 
9be760a
c160986
26e9493
 
 
c160986
9be760a
 
7445a27
 
f16f0ac
 
 
c160986
9be760a
 
 
c160986
f16f0ac
9be760a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import gradio as gr
import json
import re
import sqlite3
import logging
from util import process_json_files
from gematria import calculate_gematria
from deep_translator import GoogleTranslator, exceptions

logging.basicConfig(level=logging.INFO, format='%(message)s')

def flatten_text(text):
    """Helper function to flatten nested lists into a single list."""
    if isinstance(text, list):
        return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
    return text

def initialize_database():
    conn = sqlite3.connect('gematria.db')
    c = conn.cursor()
    c.execute('''
        CREATE TABLE IF NOT EXISTS results (
            gematria_sum INTEGER,
            words TEXT,
            translation TEXT,
            book INTEGER,
            title TEXT,
            chapter INTEGER,
            verse INTEGER,
            UNIQUE(gematria_sum, words, book, title, chapter, verse)
        )
    ''')
    conn.commit()
    conn.close()

def insert_phrase_to_db(c, gematria_sum, phrase_candidate, translation, book_id, title, chapter_id, verse_id):
    try:
        logging.info(f"Inserting: {gematria_sum}, {phrase_candidate}, {translation}, {book_id}, {title}, {chapter_id}, {verse_id}")
        c.execute('''
            INSERT INTO results (gematria_sum, words, translation, book, title, chapter, verse)
            VALUES (?, ?, ?, ?, ?, ?, ?)
        ''', (gematria_sum, phrase_candidate, translation, book_id, title, chapter_id, verse_id))
    except sqlite3.IntegrityError:
        logging.info(f"Entry already exists: {gematria_sum}, {phrase_candidate}, {book_id}, {title}, {chapter_id}, {verse_id}")

def populate_database(tanach_texts, max_phrase_length=1):
    conn = sqlite3.connect('gematria.db')
    c = conn.cursor()
    for book_id, text in enumerate(tanach_texts):
        if 'text' not in text or not isinstance(text['text'], list):
            logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
            continue
        title = text.get('title', 'Unknown')
        chapters = text['text']
        for chapter_id, chapter in enumerate(chapters):
            if not isinstance(chapter, list):
                logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
                continue
            for verse_id, verse in enumerate(chapter):
                verse_text = flatten_text(verse) 
                verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
                verse_text = re.sub(r" +", " ", verse_text)
                words = verse_text.split()
                max_length = min(max_phrase_length, len(words))
                for length in range(1, max_length + 1):
                    for start in range(len(words) - length + 1):
                        phrase_candidate = " ".join(words[start:start + length])
                        gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
                        insert_phrase_to_db(c, gematria_sum, phrase_candidate, None, book_id + 1, title, chapter_id + 1, verse_id + 1) # No translation initially
    conn.commit()
    conn.close()

def get_translation_from_db(c, phrase, book, chapter, verse):
    c.execute('''
        SELECT translation FROM results 
        WHERE words = ? AND book = ? AND chapter = ? AND verse = ?
    ''', (phrase, book, chapter, verse))
    result = c.fetchone()
    return result[0] if result else None

def translate_and_store(conn, phrase, book, chapter, verse):
    translator = GoogleTranslator(source='iw', target='en')  # Explicitly set source to Hebrew
    c = conn.cursor()
    try:
        translation = translator.translate(phrase)
        logging.info(f"Translated phrase: {translation}")
        c.execute('''
            UPDATE results 
            SET translation = ?
            WHERE words = ? AND book = ? AND chapter = ? AND verse = ?
        ''', (translation, phrase, book, chapter, verse))
        conn.commit()
        return translation
    except (exceptions.TranslationNotFound, exceptions.NotValidPayload, 
            exceptions.ServerException, exceptions.RequestError) as e:
        logging.error(f"Error translating phrase '{phrase}': {e}")
        return "[Translation Error]"

def gematria_search_interface(phrase):
    debug_output = []

    def debug_callback(message):
        debug_output.append(message)
        logging.info(message)

    if not phrase.strip():
        return "Please enter a phrase.", "\n".join(debug_output)

    phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
    debug_callback(f"Debug: Gematria of the search phrase '{phrase}' is {phrase_gematria}")

    conn = sqlite3.connect('gematria.db')
    c = conn.cursor()
    matching_phrases = search_gematria_in_db(c, phrase_gematria)

    if not matching_phrases:
        conn.close()
        return "No matching phrases found.", "\n".join(debug_output)

    # Sort matching phrases by book, chapter, and verse
    matching_phrases.sort(key=lambda x: (x[1], x[3], x[4]))

    result = "Matching phrases:\n"
    for match in matching_phrases:
        if len(match) != 6:  # Adjusted length for added translation
            debug_callback(f"Error: Expected tuple of length 6, but got {len(match)}: {match}")
            continue
        words, book, title, chapter, verse, translation = match
        if not translation: # Check if translation exists
            translation = translate_and_store(conn, words, book, chapter, verse)
        result += f"Book: {title} ({book})\nChapter: {chapter}, Verse: {verse}\nPhrase: {words}\nTranslation: {translation}\n\n"

    conn.close()
    return result, "\n".join(debug_output)

def search_gematria_in_db(c, gematria_sum):
    c.execute('''
        SELECT words, book, title, chapter, verse, translation FROM results WHERE gematria_sum = ?
    ''', (gematria_sum,))
    results = c.fetchall()
    logging.info(f"Search results: {results}")
    return results

def run_test():
    debug_output = []
    #test_phrase = "ืื—ืจ ื•ืืชื‘ื ื™ืžื™ืŸ ื•ืื ื™"
    #expected_gematria = 1495

    def debug_callback(message):
        debug_output.append(message)
        logging.info(message)

    # Load the test JSON contents for 01.json
    #test_texts_00 = process_json_files(0, 0)
    #test_texts_01 = process_json_files(1, 1)
    #populate_database(test_texts_00, max_phrase_length=22)  # Populate the database from book 0 with phrases up to 22 words
    #populate_database(test_texts_01, max_phrase_length=3)  # Populate the database from book 1 with phrases up to 3 words
    #conn = sqlite3.connect('gematria.db')
    #c = conn.cursor()
    #matching_phrases_01 = search_gematria_in_db(c, expected_gematria)
    #conn.close()
    #assert matching_phrases_01[0][0] == test_phrase, f"Found phrase does not match: {matching_phrases_01[0][0]}"
    #print("Test successful: The phrase was correctly found and the gematria matches in 01.json.")
    #print("\n".join(debug_output))

    test_texts = process_json_files(1, 39)
    populate_database(test_texts, max_phrase_length=1)
    #populate_database(test_texts, max_phrase_length=2)
    #populate_database(test_texts, max_phrase_length=3)

iface = gr.Interface(
    fn=gematria_search_interface,
    inputs=gr.Textbox(label="Enter phrase"),
    outputs=[gr.Textbox(label="Results"), gr.Textbox(label="Debug Output")],
    title="Gematria Search in Tanach",
    description="Search for phrases in Tanach that have the same gematria value as the entered phrase.",
    live=False,  # Disable live update
    allow_flagging="never"  # Disable flagging 
)

if __name__ == "__main__":
    initialize_database()
    run_test()  # Run tests
    iface.launch()