gematria_date_sums

Running

App Files Files Community

gematria_date_sums / app.py

neuralworm

fixes

26e9493 10 months ago

raw

history blame

7.72 kB

	import gradio as gr
	import json
	import re
	import sqlite3
	import logging
	from util import process_json_files
	from gematria import calculate_gematria
	from deep_translator import GoogleTranslator, exceptions

	logging.basicConfig(level=logging.INFO, format='%(message)s')

	def flatten_text(text):
	"""Helper function to flatten nested lists into a single list."""
	if isinstance(text, list):
	return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
	return text

	def initialize_database():
	conn = sqlite3.connect('gematria.db')
	c = conn.cursor()
	c.execute('''
	CREATE TABLE IF NOT EXISTS results (
	gematria_sum INTEGER,
	words TEXT,
	translation TEXT,
	book INTEGER,
	title TEXT,
	chapter INTEGER,
	verse INTEGER,
	UNIQUE(gematria_sum, words, book, title, chapter, verse)
	)
	''')
	conn.commit()
	conn.close()

	def insert_phrase_to_db(c, gematria_sum, phrase_candidate, translation, book_id, title, chapter_id, verse_id):
	try:
	logging.info(f"Inserting: {gematria_sum}, {phrase_candidate}, {translation}, {book_id}, {title}, {chapter_id}, {verse_id}")
	c.execute('''
	INSERT INTO results (gematria_sum, words, translation, book, title, chapter, verse)
	VALUES (?, ?, ?, ?, ?, ?, ?)
	''', (gematria_sum, phrase_candidate, translation, book_id, title, chapter_id, verse_id))
	except sqlite3.IntegrityError:
	logging.info(f"Entry already exists: {gematria_sum}, {phrase_candidate}, {book_id}, {title}, {chapter_id}, {verse_id}")

	def populate_database(tanach_texts, max_phrase_length=1):
	conn = sqlite3.connect('gematria.db')
	c = conn.cursor()
	for book_id, text in enumerate(tanach_texts):
	if 'text' not in text or not isinstance(text['text'], list):
	logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
	continue
	title = text.get('title', 'Unknown')
	chapters = text['text']
	for chapter_id, chapter in enumerate(chapters):
	if not isinstance(chapter, list):
	logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
	continue
	for verse_id, verse in enumerate(chapter):
	verse_text = flatten_text(verse)
	verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
	verse_text = re.sub(r" +", " ", verse_text)
	words = verse_text.split()
	max_length = min(max_phrase_length, len(words))
	for length in range(1, max_length + 1):
	for start in range(len(words) - length + 1):
	phrase_candidate = " ".join(words[start:start + length])
	gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
	insert_phrase_to_db(c, gematria_sum, phrase_candidate, None, book_id + 1, title, chapter_id + 1, verse_id + 1) # No translation initially
	conn.commit()
	conn.close()

	def get_translation_from_db(c, phrase, book, chapter, verse):
	c.execute('''
	SELECT translation FROM results
	WHERE words = ? AND book = ? AND chapter = ? AND verse = ?
	''', (phrase, book, chapter, verse))
	result = c.fetchone()
	return result[0] if result else None

	def translate_and_store(conn, phrase, book, chapter, verse):
	translator = GoogleTranslator(source='iw', target='en') # Explicitly set source to Hebrew
	c = conn.cursor()
	try:
	translation = translator.translate(phrase)
	logging.info(f"Translated phrase: {translation}")
	c.execute('''
	UPDATE results
	SET translation = ?
	WHERE words = ? AND book = ? AND chapter = ? AND verse = ?
	''', (translation, phrase, book, chapter, verse))
	conn.commit()
	return translation
	except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
	exceptions.ServerException, exceptions.RequestError) as e:
	logging.error(f"Error translating phrase '{phrase}': {e}")
	return "[Translation Error]"

	def gematria_search_interface(phrase):
	debug_output = []

	def debug_callback(message):
	debug_output.append(message)
	logging.info(message)

	if not phrase.strip():
	return "Please enter a phrase.", "\n".join(debug_output)

	phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
	debug_callback(f"Debug: Gematria of the search phrase '{phrase}' is {phrase_gematria}")

	conn = sqlite3.connect('gematria.db')
	c = conn.cursor()
	matching_phrases = search_gematria_in_db(c, phrase_gematria)

	if not matching_phrases:
	conn.close()
	return "No matching phrases found.", "\n".join(debug_output)

	# Sort matching phrases by book, chapter, and verse
	matching_phrases.sort(key=lambda x: (x[1], x[3], x[4]))

	result = "Matching phrases:\n"
	for match in matching_phrases:
	if len(match) != 6: # Adjusted length for added translation
	debug_callback(f"Error: Expected tuple of length 6, but got {len(match)}: {match}")
	continue
	words, book, title, chapter, verse, translation = match
	if not translation: # Check if translation exists
	translation = translate_and_store(conn, words, book, chapter, verse)
	result += f"Book: {title} ({book})\nChapter: {chapter}, Verse: {verse}\nPhrase: {words}\nTranslation: {translation}\n\n"

	conn.close()
	return result, "\n".join(debug_output)

	def search_gematria_in_db(c, gematria_sum):
	c.execute('''
	SELECT words, book, title, chapter, verse, translation FROM results WHERE gematria_sum = ?
	''', (gematria_sum,))
	results = c.fetchall()
	logging.info(f"Search results: {results}")
	return results

	def run_test():
	debug_output = []
	#test_phrase = "אחר ואתבנימין ואני"
	#expected_gematria = 1495

	def debug_callback(message):
	debug_output.append(message)
	logging.info(message)

	# Load the test JSON contents for 01.json
	#test_texts_00 = process_json_files(0, 0)
	#test_texts_01 = process_json_files(1, 1)
	#populate_database(test_texts_00, max_phrase_length=22) # Populate the database from book 0 with phrases up to 22 words
	#populate_database(test_texts_01, max_phrase_length=3) # Populate the database from book 1 with phrases up to 3 words
	#conn = sqlite3.connect('gematria.db')
	#c = conn.cursor()
	#matching_phrases_01 = search_gematria_in_db(c, expected_gematria)
	#conn.close()
	#assert matching_phrases_01[0][0] == test_phrase, f"Found phrase does not match: {matching_phrases_01[0][0]}"
	#print("Test successful: The phrase was correctly found and the gematria matches in 01.json.")
	#print("\n".join(debug_output))

	test_texts = process_json_files(1, 39)
	populate_database(test_texts, max_phrase_length=1)
	#populate_database(test_texts, max_phrase_length=2)
	#populate_database(test_texts, max_phrase_length=3)

	iface = gr.Interface(
	fn=gematria_search_interface,
	inputs=gr.Textbox(label="Enter phrase"),
	outputs=[gr.Textbox(label="Results"), gr.Textbox(label="Debug Output")],
	title="Gematria Search in Tanach",
	description="Search for phrases in Tanach that have the same gematria value as the entered phrase.",
	live=False, # Disable live update
	allow_flagging="never" # Disable flagging
	)

	if __name__ == "__main__":
	initialize_database()
	run_test() # Run tests
	iface.launch()