neuralworm commited on
Commit
a13f26e
·
1 Parent(s): 84b09e2

result fix

Browse files
Files changed (1) hide show
  1. app.py +227 -227
app.py CHANGED
@@ -18,252 +18,252 @@ translator = None
18
  book_names = {} # Dictionary to store book names
19
 
20
  def flatten_text(text):
21
- """Helper function to flatten nested lists into a single list."""
22
- if isinstance(text, list):
23
- return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
24
- return text
25
 
26
  def initialize_database():
27
- """Initializes the SQLite database."""
28
- global conn
29
- conn = sqlite3.connect('gematria.db')
30
- c = conn.cursor()
31
- c.execute('''
32
- CREATE TABLE IF NOT EXISTS results (
33
- gematria_sum INTEGER,
34
- words TEXT UNIQUE,
35
- translation TEXT,
36
- book INTEGER,
37
- chapter INTEGER,
38
- verse INTEGER,
39
- PRIMARY KEY (words, book, chapter, verse)
40
- )
41
- ''')
42
- c.execute('''
43
- CREATE TABLE IF NOT EXISTS processed_books (
44
- book INTEGER PRIMARY KEY,
45
- max_phrase_length INTEGER
46
- )
47
- ''')
48
- conn.commit()
49
- logging.info("Database initialized.")
50
 
51
  def initialize_translator():
52
- """Initializes the Google Translator."""
53
- global translator
54
- translator = GoogleTranslator(source='iw', target='en')
55
- logging.info("Translator initialized.")
56
 
57
  def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse):
58
- """Inserts a phrase and its Gematria value into the database."""
59
- global conn
60
- c = conn.cursor()
61
- try:
62
- c.execute('''
63
- INSERT INTO results (gematria_sum, words, book, chapter, verse)
64
- VALUES (?, ?, ?, ?, ?)
65
- ''', (gematria_sum, phrase_candidate, book, chapter, verse))
66
- conn.commit()
67
- logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
68
- except sqlite3.IntegrityError:
69
- logging.debug(f"Phrase already exists: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
70
 
71
  def populate_database(tanach_texts, max_phrase_length=1):
72
- """Populates the database with phrases from the Tanach and their Gematria values."""
73
- global conn, book_names
74
- logging.info("Populating database...")
75
- c = conn.cursor()
76
-
77
- for book_id, text in tanach_texts: # Unpack the tuple (book_id, text)
78
- c.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_id,))
79
- result = c.fetchone()
80
- if result and result[0] >= max_phrase_length:
81
- logging.info(f"Skipping book {book_id}: Already processed with max_phrase_length {result[0]}")
82
- continue
83
-
84
- logging.info(f"Processing book {book_id} with max_phrase_length {max_phrase_length}")
85
- if 'text' not in text or not isinstance(text['text'], list):
86
- logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
87
- continue
88
-
89
- title = text.get('title', 'Unknown')
90
- book_names[book_id] = title # Store book name
91
-
92
- chapters = text['text']
93
- for chapter_id, chapter in enumerate(chapters):
94
- if not isinstance(chapter, list):
95
- logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
96
- continue
97
- for verse_id, verse in enumerate(chapter):
98
- verse_text = flatten_text(verse)
99
- verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
100
- verse_text = re.sub(r" +", " ", verse_text)
101
- words = verse_text.split()
102
- for length in range(1, max_phrase_length + 1):
103
- for start in range(len(words) - length + 1):
104
- phrase_candidate = " ".join(words[start:start + length])
105
- gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
106
- insert_phrase_to_db(gematria_sum, phrase_candidate, book_id, chapter_id + 1, verse_id + 1)
107
- try:
108
- c.execute('''INSERT INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (book_id, max_phrase_length))
109
- except sqlite3.IntegrityError:
110
- c.execute('''UPDATE processed_books SET max_phrase_length = ? WHERE book = ?''', (max_phrase_length, book_id))
111
- conn.commit()
112
- logging.info("Database population complete.")
113
 
114
  def get_translation(phrase):
115
- """Retrieves or generates the English translation of a Hebrew phrase."""
116
- global translator, conn
117
- c = conn.cursor()
 
 
 
 
 
 
 
 
 
118
  c.execute('''
119
- SELECT translation FROM results
120
- WHERE words = ?
121
- ''', (phrase,))
122
- result = c.fetchone()
123
- if result and result[0]:
124
- return result[0]
125
- else:
126
- translation = translate_and_store(phrase)
127
- c.execute('''
128
- UPDATE results
129
- SET translation = ?
130
- WHERE words = ?
131
- ''', (translation, phrase))
132
- conn.commit()
133
- return translation
134
 
135
 
136
  def translate_and_store(phrase):
137
- global translator
138
- max_retries = 3 # You can adjust the number of retries
139
- retries = 0
140
-
141
- while retries < max_retries:
142
- try:
143
- translation = translator.translate(phrase)
144
- logging.debug(f"Translated phrase: {translation}")
145
- return translation
146
- except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
147
- exceptions.ServerException, exceptions.RequestError, requests.exceptions.ConnectionError) as e: # Add ConnectionError
148
- retries += 1
149
- logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})")
150
-
151
- logging.error(f"Failed to translate phrase '{phrase}' after {max_retries} retries.")
152
- return "[Translation Error]"
153
 
154
  def search_gematria_in_db(gematria_sum):
155
- """Searches the database for phrases with a given Gematria value."""
156
- global conn
157
- c = conn.cursor()
158
- c.execute('''
159
- SELECT words, book, chapter, verse FROM results WHERE gematria_sum = ?
160
- ''', (gematria_sum,))
161
- results = c.fetchall()
162
- logging.debug(f"Found {len(results)} matching phrases for Gematria: {gematria_sum}")
163
- return results
164
 
165
  def gematria_search_interface(phrase):
166
- """The main function for the Gradio interface."""
167
- if not phrase.strip():
168
- return "Please enter a phrase."
169
-
170
- # Create database connection inside the function
171
- global conn, book_names
172
- conn = sqlite3.connect('gematria.db')
173
- c = conn.cursor()
174
-
175
- phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
176
- logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
177
-
178
- matching_phrases = search_gematria_in_db(phrase_gematria)
179
- if not matching_phrases:
180
- return "No matching phrases found."
181
-
182
- # Sort results by book, chapter, and verse
183
- sorted_phrases = sorted(matching_phrases, key=lambda x: (x[1], x[2], x[3]))
184
-
185
- # Group results by book
186
- results_by_book = defaultdict(list)
187
- for words, book, chapter, verse in sorted_phrases:
188
- results_by_book[book].append((words, chapter, verse))
189
-
190
- # Format results for display with enhanced structure
191
- results = []
192
- results.append("<div class='results-container'>")
193
- for book, phrases in results_by_book.items():
194
- results.append(f"<h4>Book: {book_names.get(book, 'Unknown')}</h4>")
195
- for words, chapter, verse in phrases:
196
- translation = get_translation(words)
197
- book_name_english = book_names.get(book, 'Unknown')
198
- link = f"https://www.biblegateway.com/passage/?search={quote_plus(book_name_english)}+{chapter}%3A{verse}"
199
-
200
- results.append(f"""
201
- <div class='result-item'>
202
- <p>Chapter: {chapter}, Verse: {verse}</p>
203
- <p class='hebrew-phrase'>Hebrew Phrase: {words}</p>
204
- <p>Translation: {translation}</p>
205
- <a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a>
206
- </div>
207
- """)
208
- results.append("</div>") # Close results-container div
209
-
210
- conn.close()
211
-
212
- # Add CSS styling
213
- style = """
214
- <style>
215
- .results-container {
216
- display: grid;
217
- grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
218
- gap: 20px;
219
- }
220
-
221
- .result-item {
222
- border: 1px solid #ccc;
223
- padding: 15px;
224
- border-radius: 5px;
225
- box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1);
226
- }
227
-
228
- .hebrew-phrase {
229
- font-family: 'SBL Hebrew', 'Ezra SIL', serif;
230
- direction: rtl;
231
- }
232
-
233
- .bible-link {
234
- display: block;
235
- margin-top: 10px;
236
- color: #007bff;
237
- text-decoration: none;
238
- }
239
- </style>
240
- """
241
-
242
- return style + "\n".join(results) # Concatenate style and results
243
 
244
  def run_app():
245
- """Initializes and launches the Gradio app."""
246
- initialize_database()
247
- initialize_translator()
248
-
249
- # Pre-populate the database
250
- tanach_texts = process_json_files(1, 39) # Process all books
251
- populate_database(tanach_texts, max_phrase_length=1)
252
- tanach_texts = process_json_files(1, 1) # Process all books
253
- populate_database(tanach_texts, max_phrase_length=4)
254
- tanach_texts = process_json_files(27, 27) # Process all books
255
- populate_database(tanach_texts, max_phrase_length=4)
256
-
257
- iface = gr.Interface(
258
- fn=gematria_search_interface,
259
- inputs=gr.Textbox(label="Enter phrase"),
260
- outputs=gr.HTML(label="Results"),
261
- title="Gematria Search in Tanach",
262
- description="Search for phrases in the Tanach that have the same Gematria value.",
263
- live=False,
264
- allow_flagging="never"
265
- )
266
- iface.launch()
267
 
268
  if __name__ == "__main__":
269
- run_app()
 
18
  book_names = {} # Dictionary to store book names
19
 
20
  def flatten_text(text):
21
+ """Helper function to flatten nested lists into a single list."""
22
+ if isinstance(text, list):
23
+ return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
24
+ return text
25
 
26
  def initialize_database():
27
+ """Initializes the SQLite database."""
28
+ global conn
29
+ conn = sqlite3.connect('gematria.db')
30
+ c = conn.cursor()
31
+ c.execute('''
32
+ CREATE TABLE IF NOT EXISTS results (
33
+ gematria_sum INTEGER,
34
+ words TEXT,
35
+ translation TEXT,
36
+ book INTEGER,
37
+ chapter INTEGER,
38
+ verse INTEGER,
39
+ PRIMARY KEY (gematria_sum, book, chapter, verse)
40
+ )
41
+ ''')
42
+ c.execute('''
43
+ CREATE TABLE IF NOT EXISTS processed_books (
44
+ book INTEGER PRIMARY KEY,
45
+ max_phrase_length INTEGER
46
+ )
47
+ ''')
48
+ conn.commit()
49
+ logging.info("Database initialized.")
50
 
51
  def initialize_translator():
52
+ """Initializes the Google Translator."""
53
+ global translator
54
+ translator = GoogleTranslator(source='iw', target='en')
55
+ logging.info("Translator initialized.")
56
 
57
  def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse):
58
+ """Inserts a phrase and its Gematria value into the database."""
59
+ global conn
60
+ c = conn.cursor()
61
+ try:
62
+ c.execute('''
63
+ INSERT INTO results (gematria_sum, words, book, chapter, verse)
64
+ VALUES (?, ?, ?, ?, ?)
65
+ ''', (gematria_sum, phrase_candidate, book, chapter, verse))
66
+ conn.commit()
67
+ logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
68
+ except sqlite3.IntegrityError:
69
+ logging.debug(f"Phrase already exists: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
70
 
71
  def populate_database(tanach_texts, max_phrase_length=1):
72
+ """Populates the database with phrases from the Tanach and their Gematria values."""
73
+ global conn, book_names
74
+ logging.info("Populating database...")
75
+ c = conn.cursor()
76
+
77
+ for book_id, text in tanach_texts: # Unpack the tuple (book_id, text)
78
+ c.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_id,))
79
+ result = c.fetchone()
80
+ if result and result[0] >= max_phrase_length:
81
+ logging.info(f"Skipping book {book_id}: Already processed with max_phrase_length {result[0]}")
82
+ continue
83
+
84
+ logging.info(f"Processing book {book_id} with max_phrase_length {max_phrase_length}")
85
+ if 'text' not in text or not isinstance(text['text'], list):
86
+ logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
87
+ continue
88
+
89
+ title = text.get('title', 'Unknown')
90
+ book_names[book_id] = title # Store book name
91
+
92
+ chapters = text['text']
93
+ for chapter_id, chapter in enumerate(chapters):
94
+ if not isinstance(chapter, list):
95
+ logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
96
+ continue
97
+ for verse_id, verse in enumerate(chapter):
98
+ verse_text = flatten_text(verse)
99
+ verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
100
+ verse_text = re.sub(r" +", " ", verse_text)
101
+ words = verse_text.split()
102
+ for length in range(1, max_phrase_length + 1):
103
+ for start in range(len(words) - length + 1):
104
+ phrase_candidate = " ".join(words[start:start + length])
105
+ gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
106
+ insert_phrase_to_db(gematria_sum, phrase_candidate, book_id, chapter_id + 1, verse_id + 1)
107
+ try:
108
+ c.execute('''INSERT INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (book_id, max_phrase_length))
109
+ except sqlite3.IntegrityError:
110
+ c.execute('''UPDATE processed_books SET max_phrase_length = ? WHERE book = ?''', (max_phrase_length, book_id))
111
+ conn.commit()
112
+ logging.info("Database population complete.")
113
 
114
  def get_translation(phrase):
115
+ """Retrieves or generates the English translation of a Hebrew phrase."""
116
+ global translator, conn
117
+ c = conn.cursor()
118
+ c.execute('''
119
+ SELECT translation FROM results
120
+ WHERE words = ?
121
+ ''', (phrase,))
122
+ result = c.fetchone()
123
+ if result and result[0]:
124
+ return result[0]
125
+ else:
126
+ translation = translate_and_store(phrase)
127
  c.execute('''
128
+ UPDATE results
129
+ SET translation = ?
130
+ WHERE words = ?
131
+ ''', (translation, phrase))
132
+ conn.commit()
133
+ return translation
 
 
 
 
 
 
 
 
 
134
 
135
 
136
  def translate_and_store(phrase):
137
+ global translator
138
+ max_retries = 3 # You can adjust the number of retries
139
+ retries = 0
140
+
141
+ while retries < max_retries:
142
+ try:
143
+ translation = translator.translate(phrase)
144
+ logging.debug(f"Translated phrase: {translation}")
145
+ return translation
146
+ except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
147
+ exceptions.ServerException, exceptions.RequestError, requests.exceptions.ConnectionError) as e: # Add ConnectionError
148
+ retries += 1
149
+ logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})")
150
+
151
+ logging.error(f"Failed to translate phrase '{phrase}' after {max_retries} retries.")
152
+ return "[Translation Error]"
153
 
154
  def search_gematria_in_db(gematria_sum):
155
+ """Searches the database for phrases with a given Gematria value."""
156
+ global conn
157
+ c = conn.cursor()
158
+ c.execute('''
159
+ SELECT words, book, chapter, verse FROM results WHERE gematria_sum = ?
160
+ ''', (gematria_sum,))
161
+ results = c.fetchall()
162
+ logging.debug(f"Found {len(results)} matching phrases for Gematria: {gematria_sum}")
163
+ return results
164
 
165
  def gematria_search_interface(phrase):
166
+ """The main function for the Gradio interface."""
167
+ if not phrase.strip():
168
+ return "Please enter a phrase."
169
+
170
+ # Create database connection inside the function
171
+ global conn, book_names
172
+ conn = sqlite3.connect('gematria.db')
173
+ c = conn.cursor()
174
+
175
+ phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
176
+ logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
177
+
178
+ matching_phrases = search_gematria_in_db(phrase_gematria)
179
+ if not matching_phrases:
180
+ return "No matching phrases found."
181
+
182
+ # Sort results by book, chapter, and verse
183
+ sorted_phrases = sorted(matching_phrases, key=lambda x: (x[1], x[2], x[3]))
184
+
185
+ # Group results by book
186
+ results_by_book = defaultdict(list)
187
+ for words, book, chapter, verse in sorted_phrases:
188
+ results_by_book[book].append((words, chapter, verse))
189
+
190
+ # Format results for display with enhanced structure
191
+ results = []
192
+ results.append("<div class='results-container'>")
193
+ for book, phrases in results_by_book.items():
194
+ results.append(f"<h4>Book: {book_names.get(book, 'Unknown')}</h4>")
195
+ for words, chapter, verse in phrases:
196
+ translation = get_translation(words)
197
+ book_name_english = book_names.get(book, 'Unknown')
198
+ link = f"https://www.biblegateway.com/passage/?search={quote_plus(book_name_english)}+{chapter}%3A{verse}"
199
+
200
+ results.append(f"""
201
+ <div class='result-item'>
202
+ <p>Chapter: {chapter}, Verse: {verse}</p>
203
+ <p class='hebrew-phrase'>Hebrew Phrase: {words}</p>
204
+ <p>Translation: {translation}</p>
205
+ <a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a>
206
+ </div>
207
+ """)
208
+ results.append("</div>") # Close results-container div
209
+
210
+ conn.close()
211
+
212
+ # Add CSS styling
213
+ style = """
214
+ <style>
215
+ .results-container {
216
+ display: grid;
217
+ grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
218
+ gap: 20px;
219
+ }
220
+
221
+ .result-item {
222
+ border: 1px solid #ccc;
223
+ padding: 15px;
224
+ border-radius: 5px;
225
+ box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1);
226
+ }
227
+
228
+ .hebrew-phrase {
229
+ font-family: 'SBL Hebrew', 'Ezra SIL', serif;
230
+ direction: rtl;
231
+ }
232
+
233
+ .bible-link {
234
+ display: block;
235
+ margin-top: 10px;
236
+ color: #007bff;
237
+ text-decoration: none;
238
+ }
239
+ </style>
240
+ """
241
+
242
+ return style + "\n".join(results) # Concatenate style and results
243
 
244
  def run_app():
245
+ """Initializes and launches the Gradio app."""
246
+ initialize_database()
247
+ initialize_translator()
248
+
249
+ # Pre-populate the database
250
+ tanach_texts = process_json_files(1, 39) # Process all books
251
+ populate_database(tanach_texts, max_phrase_length=4)
252
+ #tanach_texts = process_json_files(1, 1) # Process all books
253
+ #populate_database(tanach_texts, max_phrase_length=4)
254
+ #tanach_texts = process_json_files(27, 27) # Process all books
255
+ #populate_database(tanach_texts, max_phrase_length=4)
256
+
257
+ iface = gr.Interface(
258
+ fn=gematria_search_interface,
259
+ inputs=gr.Textbox(label="Enter phrase"),
260
+ outputs=gr.HTML(label="Results"),
261
+ title="Gematria Search in Tanach",
262
+ description="Search for phrases in the Tanach that have the same Gematria value.",
263
+ live=False,
264
+ allow_flagging="never"
265
+ )
266
+ iface.launch()
267
 
268
  if __name__ == "__main__":
269
+ run_app()