Spaces:
Running
Running
File size: 7,715 Bytes
9be760a 7445a27 9be760a f16f0ac 9be760a c160986 9be760a 7445a27 9be760a c160986 7445a27 c160986 7445a27 c160986 7445a27 c160986 9be760a c160986 9be760a c160986 9be760a c160986 9be760a c160986 9be760a c160986 9be760a f16f0ac 9be760a f16f0ac 9be760a c160986 9be760a c160986 f16f0ac 9be760a 28c065d f16f0ac c160986 7445a27 c160986 7445a27 9be760a c160986 9be760a 26e9493 9be760a f16f0ac 26e9493 c160986 26e9493 9be760a c160986 26e9493 c160986 9be760a 7445a27 f16f0ac c160986 9be760a c160986 f16f0ac 9be760a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
import gradio as gr
import json
import re
import sqlite3
import logging
from util import process_json_files
from gematria import calculate_gematria
from deep_translator import GoogleTranslator, exceptions
logging.basicConfig(level=logging.INFO, format='%(message)s')
def flatten_text(text):
"""Helper function to flatten nested lists into a single list."""
if isinstance(text, list):
return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
return text
def initialize_database():
conn = sqlite3.connect('gematria.db')
c = conn.cursor()
c.execute('''
CREATE TABLE IF NOT EXISTS results (
gematria_sum INTEGER,
words TEXT,
translation TEXT,
book INTEGER,
title TEXT,
chapter INTEGER,
verse INTEGER,
UNIQUE(gematria_sum, words, book, title, chapter, verse)
)
''')
conn.commit()
conn.close()
def insert_phrase_to_db(c, gematria_sum, phrase_candidate, translation, book_id, title, chapter_id, verse_id):
try:
logging.info(f"Inserting: {gematria_sum}, {phrase_candidate}, {translation}, {book_id}, {title}, {chapter_id}, {verse_id}")
c.execute('''
INSERT INTO results (gematria_sum, words, translation, book, title, chapter, verse)
VALUES (?, ?, ?, ?, ?, ?, ?)
''', (gematria_sum, phrase_candidate, translation, book_id, title, chapter_id, verse_id))
except sqlite3.IntegrityError:
logging.info(f"Entry already exists: {gematria_sum}, {phrase_candidate}, {book_id}, {title}, {chapter_id}, {verse_id}")
def populate_database(tanach_texts, max_phrase_length=1):
conn = sqlite3.connect('gematria.db')
c = conn.cursor()
for book_id, text in enumerate(tanach_texts):
if 'text' not in text or not isinstance(text['text'], list):
logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
continue
title = text.get('title', 'Unknown')
chapters = text['text']
for chapter_id, chapter in enumerate(chapters):
if not isinstance(chapter, list):
logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
continue
for verse_id, verse in enumerate(chapter):
verse_text = flatten_text(verse)
verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
verse_text = re.sub(r" +", " ", verse_text)
words = verse_text.split()
max_length = min(max_phrase_length, len(words))
for length in range(1, max_length + 1):
for start in range(len(words) - length + 1):
phrase_candidate = " ".join(words[start:start + length])
gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
insert_phrase_to_db(c, gematria_sum, phrase_candidate, None, book_id + 1, title, chapter_id + 1, verse_id + 1) # No translation initially
conn.commit()
conn.close()
def get_translation_from_db(c, phrase, book, chapter, verse):
c.execute('''
SELECT translation FROM results
WHERE words = ? AND book = ? AND chapter = ? AND verse = ?
''', (phrase, book, chapter, verse))
result = c.fetchone()
return result[0] if result else None
def translate_and_store(conn, phrase, book, chapter, verse):
translator = GoogleTranslator(source='iw', target='en') # Explicitly set source to Hebrew
c = conn.cursor()
try:
translation = translator.translate(phrase)
logging.info(f"Translated phrase: {translation}")
c.execute('''
UPDATE results
SET translation = ?
WHERE words = ? AND book = ? AND chapter = ? AND verse = ?
''', (translation, phrase, book, chapter, verse))
conn.commit()
return translation
except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
exceptions.ServerException, exceptions.RequestError) as e:
logging.error(f"Error translating phrase '{phrase}': {e}")
return "[Translation Error]"
def gematria_search_interface(phrase):
debug_output = []
def debug_callback(message):
debug_output.append(message)
logging.info(message)
if not phrase.strip():
return "Please enter a phrase.", "\n".join(debug_output)
phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
debug_callback(f"Debug: Gematria of the search phrase '{phrase}' is {phrase_gematria}")
conn = sqlite3.connect('gematria.db')
c = conn.cursor()
matching_phrases = search_gematria_in_db(c, phrase_gematria)
if not matching_phrases:
conn.close()
return "No matching phrases found.", "\n".join(debug_output)
# Sort matching phrases by book, chapter, and verse
matching_phrases.sort(key=lambda x: (x[1], x[3], x[4]))
result = "Matching phrases:\n"
for match in matching_phrases:
if len(match) != 6: # Adjusted length for added translation
debug_callback(f"Error: Expected tuple of length 6, but got {len(match)}: {match}")
continue
words, book, title, chapter, verse, translation = match
if not translation: # Check if translation exists
translation = translate_and_store(conn, words, book, chapter, verse)
result += f"Book: {title} ({book})\nChapter: {chapter}, Verse: {verse}\nPhrase: {words}\nTranslation: {translation}\n\n"
conn.close()
return result, "\n".join(debug_output)
def search_gematria_in_db(c, gematria_sum):
c.execute('''
SELECT words, book, title, chapter, verse, translation FROM results WHERE gematria_sum = ?
''', (gematria_sum,))
results = c.fetchall()
logging.info(f"Search results: {results}")
return results
def run_test():
debug_output = []
#test_phrase = "ืืืจ ืืืชืื ืืืื ืืื ื"
#expected_gematria = 1495
def debug_callback(message):
debug_output.append(message)
logging.info(message)
# Load the test JSON contents for 01.json
#test_texts_00 = process_json_files(0, 0)
#test_texts_01 = process_json_files(1, 1)
#populate_database(test_texts_00, max_phrase_length=22) # Populate the database from book 0 with phrases up to 22 words
#populate_database(test_texts_01, max_phrase_length=3) # Populate the database from book 1 with phrases up to 3 words
#conn = sqlite3.connect('gematria.db')
#c = conn.cursor()
#matching_phrases_01 = search_gematria_in_db(c, expected_gematria)
#conn.close()
#assert matching_phrases_01[0][0] == test_phrase, f"Found phrase does not match: {matching_phrases_01[0][0]}"
#print("Test successful: The phrase was correctly found and the gematria matches in 01.json.")
#print("\n".join(debug_output))
test_texts = process_json_files(1, 39)
populate_database(test_texts, max_phrase_length=1)
#populate_database(test_texts, max_phrase_length=2)
#populate_database(test_texts, max_phrase_length=3)
iface = gr.Interface(
fn=gematria_search_interface,
inputs=gr.Textbox(label="Enter phrase"),
outputs=[gr.Textbox(label="Results"), gr.Textbox(label="Debug Output")],
title="Gematria Search in Tanach",
description="Search for phrases in Tanach that have the same gematria value as the entered phrase.",
live=False, # Disable live update
allow_flagging="never" # Disable flagging
)
if __name__ == "__main__":
initialize_database()
run_test() # Run tests
iface.launch()
|