neuralworm commited on
Commit
9be760a
·
1 Parent(s): 8aede86

db implementation, phrase amount limit

Browse files
Files changed (1) hide show
  1. app.py +143 -0
app.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import re
4
+ import sqlite3
5
+ import logging
6
+ from util import process_json_files
7
+ from gematria import calculate_gematria
8
+
9
+ logging.basicConfig(level=logging.INFO, format='%(message)s')
10
+
11
+ def flatten_text(text):
12
+ """Hilfsfunktion, um verschachtelte Listen zu flachen Listen zu machen."""
13
+ if isinstance(text, list):
14
+ return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
15
+ return text
16
+
17
+ def initialize_database():
18
+ conn = sqlite3.connect('gematria.db')
19
+ c = conn.cursor()
20
+ c.execute('''
21
+ CREATE TABLE IF NOT EXISTS results (
22
+ gematria_sum INTEGER,
23
+ words TEXT,
24
+ book INTEGER,
25
+ title TEXT,
26
+ chapter INTEGER,
27
+ verse INTEGER
28
+ )
29
+ ''')
30
+ conn.commit()
31
+ conn.close()
32
+
33
+ def insert_phrase_to_db(c, gematria_sum, phrase_candidate, book_id, title, chapter_id, verse_id):
34
+ logging.info(f"Inserting: {gematria_sum}, {phrase_candidate}, {book_id}, {title}, {chapter_id + 1}, {verse_id + 1}")
35
+ c.execute('''
36
+ INSERT INTO results (gematria_sum, words, book, title, chapter, verse)
37
+ VALUES (?, ?, ?, ?, ?, ?)
38
+ ''', (gematria_sum, phrase_candidate, book_id, title, chapter_id + 1, verse_id + 1))
39
+
40
+ def populate_database(tanach_texts, max_phrase_length=1):
41
+ conn = sqlite3.connect('gematria.db')
42
+ c = conn.cursor()
43
+ for book_id, text in enumerate(tanach_texts):
44
+ if not isinstance(text.get('text'), list):
45
+ continue
46
+ title = text.get('title', 'Unknown')
47
+ chapters = text['text']
48
+ for chapter_id, chapter in enumerate(chapters):
49
+ if not isinstance(chapter, list):
50
+ continue
51
+ chapter_text = ' '.join([flatten_text(verse) for verse in chapter])
52
+ chapter_text = re.sub(r"[^\u05D0-\u05EA ]+", "", chapter_text)
53
+ chapter_text = re.sub(r" +", " ", chapter_text)
54
+ words = chapter_text.split()
55
+ max_length = min(max_phrase_length, len(words))
56
+ for length in range(1, max_length + 1):
57
+ for start in range(len(words) - length + 1):
58
+ phrase_candidate = " ".join(words[start:start + length])
59
+ gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
60
+ insert_phrase_to_db(c, gematria_sum, phrase_candidate, book_id, title, chapter_id, start + 1)
61
+ conn.commit()
62
+ conn.close()
63
+
64
+ def search_gematria_in_db(gematria_sum):
65
+ conn = sqlite3.connect('gematria.db')
66
+ c = conn.cursor()
67
+ c.execute('''
68
+ SELECT words, book, title, chapter, verse FROM results WHERE gematria_sum = ?
69
+ ''', (gematria_sum,))
70
+ results = c.fetchall()
71
+ conn.close()
72
+ return results
73
+
74
+ def db(tanach_texts, max_phrase_length=1):
75
+ initialize_database()
76
+ populate_database(tanach_texts, max_phrase_length)
77
+ logging.info("Datenbank erfolgreich erstellt und gefüllt.")
78
+
79
+ def gematria_search_interface(phrase):
80
+ debug_output = []
81
+
82
+ def debug_callback(message):
83
+ debug_output.append(message)
84
+ logging.info(message)
85
+
86
+ if not phrase.strip():
87
+ return "Bitte geben Sie eine Phrase ein.", "\n".join(debug_output)
88
+
89
+ phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
90
+ debug_callback(f"Debug: Gematria der Suchphrase '{phrase}' ist {phrase_gematria}")
91
+
92
+ matching_phrases = search_gematria_in_db(phrase_gematria)
93
+
94
+ if not matching_phrases:
95
+ return "Keine passenden Phrasen gefunden.", "\n".join(debug_output)
96
+
97
+ result = "Passende Phrasen:\n"
98
+ for match in matching_phrases:
99
+ result += f"Buch: {match[1]} ({match[2]})\nKapitel: {match[3]}, Vers: {match[4]}\nPhrase: {match[0]}\n\n"
100
+
101
+ return result, "\n".join(debug_output)
102
+
103
+ def run_test():
104
+ debug_output = []
105
+ test_phrase = "אבגדהוזחטיכלמנסעפצקרשת"
106
+ expected_gematria = 1495
107
+
108
+ def debug_callback(message):
109
+ debug_output.append(message)
110
+ logging.info(message)
111
+
112
+ # Lade die Test-JSON-Inhalte für 00.json
113
+ test_texts_00 = process_json_files(0, 0)
114
+ db(test_texts_00, max_phrase_length=1) # Populate the database with 1-word phrases
115
+ matching_phrases_00 = search_gematria_in_db(expected_gematria)
116
+ assert matching_phrases_00, "Keine passenden Phrasen gefunden in 00.json."
117
+ assert matching_phrases_00[0][0].replace(" ", "") == test_phrase, f"Gefundene Phrase stimmt nicht überein: {matching_phrases_00[0][0]}"
118
+ print("Test erfolgreich: Die Phrase wurde korrekt gefunden und die Gematria stimmt überein in 00.json.")
119
+
120
+ # Lade die Test-JSON-Inhalte für 01.json
121
+ test_texts_01 = process_json_files(1, 1)
122
+ db(test_texts_01, max_phrase_length=2) # Populate the database with 1-word phrases
123
+ search_phrase_01 = "בארון במצרים"
124
+ expected_gematria_01 = calculate_gematria(search_phrase_01.replace(" ", ""))
125
+
126
+ matching_phrases_01 = search_gematria_in_db(expected_gematria_01)
127
+ assert matching_phrases_01, "Keine passenden Phrasen gefunden in 01.json."
128
+ assert matching_phrases_01[0][0].replace(" ", "") == search_phrase_01.replace(" ", ""), f"Gefundene Phrase stimmt nicht überein: {matching_phrases_01[0][0]}"
129
+ print("Test erfolgreich: Die Phrase wurde korrekt gefunden und die Gematria stimmt überein in 01.json.")
130
+ print("\n".join(debug_output))
131
+
132
+ iface = gr.Interface(
133
+ fn=gematria_search_interface,
134
+ inputs="text",
135
+ outputs=["text", "text"],
136
+ title="Gematria-Suche im Tanach",
137
+ description="Suche nach Phrasen im Tanach, die denselben Gematria-Wert haben wie die eingegebene Phrase.",
138
+ live=True # Aktiviert das Live-Update
139
+ )
140
+
141
+ if __name__ == "__main__":
142
+ run_test() # Test ausführen
143
+ iface.launch()