Spaces:
Sleeping
Sleeping
File size: 6,249 Bytes
9be760a cd52d3a 9be760a f16f0ac 9be760a cd52d3a 9be760a f16f0ac 9be760a f16f0ac 9be760a f16f0ac 9be760a f16f0ac 9be760a cd52d3a f16f0ac cd52d3a f16f0ac 9be760a f16f0ac 9be760a f16f0ac 9be760a 5bb3a98 9be760a f16f0ac 9be760a f16f0ac 9be760a 5bb3a98 9be760a f16f0ac 9be760a f16f0ac 9be760a f16f0ac 9be760a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import gradio as gr
import json
import re
import sqlite3
import logging
from util import process_json_files
from gematria import calculate_gematria
from deep_translator import GoogleTranslator
logging.basicConfig(level=logging.INFO, format='%(message)s')
def flatten_text(text):
"""Helper function to flatten nested lists into a single list."""
if isinstance(text, list):
return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
return text
def initialize_database():
conn = sqlite3.connect('gematria.db')
c = conn.cursor()
c.execute('''
CREATE TABLE IF NOT EXISTS results (
gematria_sum INTEGER,
words TEXT,
book INTEGER,
title TEXT,
chapter INTEGER,
verse INTEGER
)
''')
conn.commit()
conn.close()
def insert_phrase_to_db(c, gematria_sum, phrase_candidate, book_id, title, chapter_id, verse_id):
logging.info(f"Inserting: {gematria_sum}, {phrase_candidate}, {book_id}, {title}, {chapter_id + 1}, {verse_id + 1}")
c.execute('''
INSERT INTO results (gematria_sum, words, book, title, chapter, verse)
VALUES (?, ?, ?, ?, ?, ?)
''', (gematria_sum, phrase_candidate, book_id, title, chapter_id + 1, verse_id + 1))
def populate_database(tanach_texts, max_phrase_length=1):
conn = sqlite3.connect('gematria.db')
c = conn.cursor()
for book_id, text in enumerate(tanach_texts):
if not isinstance(text.get('text'), list):
continue
title = text.get('title', 'Unknown')
chapters = text['text']
for chapter_id, chapter in enumerate(chapters):
if not isinstance(chapter, list):
continue
chapter_text = ' '.join([flatten_text(verse) for verse in chapter])
chapter_text = re.sub(r"[^\u05D0-\u05EA ]+", "", chapter_text)
chapter_text = re.sub(r" +", " ", chapter_text)
words = chapter_text.split()
max_length = min(max_phrase_length, len(words))
for length in range(1, max_length + 1):
for start in range(len(words) - length + 1):
phrase_candidate = " ".join(words[start:start + length])
gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
insert_phrase_to_db(c, gematria_sum, phrase_candidate, book_id, title, chapter_id, start + 1)
conn.commit()
conn.close()
def search_gematria_in_db(gematria_sum):
conn = sqlite3.connect('gematria.db')
c = conn.cursor()
c.execute('''
SELECT words, book, title, chapter, verse FROM results WHERE gematria_sum = ?
''', (gematria_sum,))
results = c.fetchall()
conn.close()
return results
def translate_phrases(phrases):
translator = GoogleTranslator(source='auto', target='en')
translated_phrases = []
for phrase in phrases:
translated_phrases.append(translator.translate(phrase))
return translated_phrases
def db(tanach_texts, max_phrase_length=1):
initialize_database()
populate_database(tanach_texts, max_phrase_length)
logging.info("Database successfully created and populated.")
def gematria_search_interface(phrase):
debug_output = []
def debug_callback(message):
debug_output.append(message)
logging.info(message)
if not phrase.strip():
return "Please enter a phrase.", "\n".join(debug_output)
phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
debug_callback(f"Debug: Gematria of the search phrase '{phrase}' is {phrase_gematria}")
matching_phrases = search_gematria_in_db(phrase_gematria)
if not matching_phrases:
return "No matching phrases found.", "\n".join(debug_output)
phrases = [match[0] for match in matching_phrases]
translations = translate_phrases(phrases)
result = "Matching phrases:\n"
for match, translation in zip(matching_phrases, translations):
result += f"Book: {match[2]} ({match[3]})\nChapter: {match[4]}, Verse: {match[5]}\nPhrase: {match[0]}\nTranslation: {translation}\n\n"
return result, "\n".join(debug_callback)
def run_test():
debug_output = []
test_phrase = "אבגדהוזחטיכלמנסעפצקרשת"
expected_gematria = 1495
def debug_callback(message):
debug_output.append(message)
logging.info(message)
# Load the test JSON contents for 00.json
test_texts_00 = process_json_files(0, 0)
db(test_texts_00, max_phrase_length=22) # Populate the database with 1-word phrases
matching_phrases_00 = search_gematria_in_db(expected_gematria)
assert matching_phrases_00, "No matching phrases found in 00.json."
assert matching_phrases_00[0][0].replace(" ", "") == test_phrase, f"Found phrase does not match: {matching_phrases_00[0][0]}"
print("Test successful: The phrase was correctly found and the gematria matches in 00.json.")
# Load the test JSON contents for 01.json
test_texts_01 = process_json_files(1, 1)
db(test_texts_01, max_phrase_length=2) # Populate the database with 1-word phrases
search_phrase_01 = "אתקלך שמעתי"
expected_gematria_01 = calculate_gematria(search_phrase_01.replace(" ", ""))
matching_phrases_01 = search_gematria_in_db(expected_gematria_01)
assert matching_phrases_01, "No matching phrases found in 01.json."
assert matching_phrases_01[0][0].replace(" ", "") == search_phrase_01.replace(" ", ""), f"Found phrase does not match: {matching_phrases_01[0][0]}"
print("Test successful: The phrase was correctly found and the gematria matches in 01.json.")
print("\n".join(debug_output))
iface = gr.Interface(
fn=gematria_search_interface,
inputs=gr.inputs.Textbox(label="Enter phrase"),
outputs=[gr.outputs.Textbox(label="Results"), gr.outputs.Textbox(label="Debug Output")],
title="Gematria Search in Tanach",
description="Search for phrases in Tanach that have the same gematria value as the entered phrase.",
live=False, # Disable live update
allow_flagging=False, # Disable flagging for simplicity
)
if __name__ == "__main__":
run_test() # Run tests
iface.launch()
|