neuralworm's picture
english app
f16f0ac
raw
history blame
6.25 kB
import gradio as gr
import json
import re
import sqlite3
import logging
from util import process_json_files
from gematria import calculate_gematria
from deep_translator import GoogleTranslator
logging.basicConfig(level=logging.INFO, format='%(message)s')
def flatten_text(text):
"""Helper function to flatten nested lists into a single list."""
if isinstance(text, list):
return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
return text
def initialize_database():
conn = sqlite3.connect('gematria.db')
c = conn.cursor()
c.execute('''
CREATE TABLE IF NOT EXISTS results (
gematria_sum INTEGER,
words TEXT,
book INTEGER,
title TEXT,
chapter INTEGER,
verse INTEGER
)
''')
conn.commit()
conn.close()
def insert_phrase_to_db(c, gematria_sum, phrase_candidate, book_id, title, chapter_id, verse_id):
logging.info(f"Inserting: {gematria_sum}, {phrase_candidate}, {book_id}, {title}, {chapter_id + 1}, {verse_id + 1}")
c.execute('''
INSERT INTO results (gematria_sum, words, book, title, chapter, verse)
VALUES (?, ?, ?, ?, ?, ?)
''', (gematria_sum, phrase_candidate, book_id, title, chapter_id + 1, verse_id + 1))
def populate_database(tanach_texts, max_phrase_length=1):
conn = sqlite3.connect('gematria.db')
c = conn.cursor()
for book_id, text in enumerate(tanach_texts):
if not isinstance(text.get('text'), list):
continue
title = text.get('title', 'Unknown')
chapters = text['text']
for chapter_id, chapter in enumerate(chapters):
if not isinstance(chapter, list):
continue
chapter_text = ' '.join([flatten_text(verse) for verse in chapter])
chapter_text = re.sub(r"[^\u05D0-\u05EA ]+", "", chapter_text)
chapter_text = re.sub(r" +", " ", chapter_text)
words = chapter_text.split()
max_length = min(max_phrase_length, len(words))
for length in range(1, max_length + 1):
for start in range(len(words) - length + 1):
phrase_candidate = " ".join(words[start:start + length])
gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
insert_phrase_to_db(c, gematria_sum, phrase_candidate, book_id, title, chapter_id, start + 1)
conn.commit()
conn.close()
def search_gematria_in_db(gematria_sum):
conn = sqlite3.connect('gematria.db')
c = conn.cursor()
c.execute('''
SELECT words, book, title, chapter, verse FROM results WHERE gematria_sum = ?
''', (gematria_sum,))
results = c.fetchall()
conn.close()
return results
def translate_phrases(phrases):
translator = GoogleTranslator(source='auto', target='en')
translated_phrases = []
for phrase in phrases:
translated_phrases.append(translator.translate(phrase))
return translated_phrases
def db(tanach_texts, max_phrase_length=1):
initialize_database()
populate_database(tanach_texts, max_phrase_length)
logging.info("Database successfully created and populated.")
def gematria_search_interface(phrase):
debug_output = []
def debug_callback(message):
debug_output.append(message)
logging.info(message)
if not phrase.strip():
return "Please enter a phrase.", "\n".join(debug_output)
phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
debug_callback(f"Debug: Gematria of the search phrase '{phrase}' is {phrase_gematria}")
matching_phrases = search_gematria_in_db(phrase_gematria)
if not matching_phrases:
return "No matching phrases found.", "\n".join(debug_output)
phrases = [match[0] for match in matching_phrases]
translations = translate_phrases(phrases)
result = "Matching phrases:\n"
for match, translation in zip(matching_phrases, translations):
result += f"Book: {match[2]} ({match[3]})\nChapter: {match[4]}, Verse: {match[5]}\nPhrase: {match[0]}\nTranslation: {translation}\n\n"
return result, "\n".join(debug_callback)
def run_test():
debug_output = []
test_phrase = "ืื‘ื’ื“ื”ื•ื–ื—ื˜ื™ื›ืœืžื ืกืขืคืฆืงืจืฉืช"
expected_gematria = 1495
def debug_callback(message):
debug_output.append(message)
logging.info(message)
# Load the test JSON contents for 00.json
test_texts_00 = process_json_files(0, 0)
db(test_texts_00, max_phrase_length=22) # Populate the database with 1-word phrases
matching_phrases_00 = search_gematria_in_db(expected_gematria)
assert matching_phrases_00, "No matching phrases found in 00.json."
assert matching_phrases_00[0][0].replace(" ", "") == test_phrase, f"Found phrase does not match: {matching_phrases_00[0][0]}"
print("Test successful: The phrase was correctly found and the gematria matches in 00.json.")
# Load the test JSON contents for 01.json
test_texts_01 = process_json_files(1, 1)
db(test_texts_01, max_phrase_length=2) # Populate the database with 1-word phrases
search_phrase_01 = "ืืชืงืœืš ืฉืžืขืชื™"
expected_gematria_01 = calculate_gematria(search_phrase_01.replace(" ", ""))
matching_phrases_01 = search_gematria_in_db(expected_gematria_01)
assert matching_phrases_01, "No matching phrases found in 01.json."
assert matching_phrases_01[0][0].replace(" ", "") == search_phrase_01.replace(" ", ""), f"Found phrase does not match: {matching_phrases_01[0][0]}"
print("Test successful: The phrase was correctly found and the gematria matches in 01.json.")
print("\n".join(debug_output))
iface = gr.Interface(
fn=gematria_search_interface,
inputs=gr.inputs.Textbox(label="Enter phrase"),
outputs=[gr.outputs.Textbox(label="Results"), gr.outputs.Textbox(label="Debug Output")],
title="Gematria Search in Tanach",
description="Search for phrases in Tanach that have the same gematria value as the entered phrase.",
live=False, # Disable live update
allow_flagging=False, # Disable flagging for simplicity
)
if __name__ == "__main__":
run_test() # Run tests
iface.launch()