neuralworm commited on
Commit
c160986
1 Parent(s): 28c065d

saving translations, fixes

Browse files
Files changed (2) hide show
  1. app.py +80 -72
  2. gematria.db +2 -2
app.py CHANGED
@@ -22,6 +22,7 @@ def initialize_database():
22
  CREATE TABLE IF NOT EXISTS results (
23
  gematria_sum INTEGER,
24
  words TEXT,
 
25
  book INTEGER,
26
  title TEXT,
27
  chapter INTEGER,
@@ -32,68 +33,68 @@ def initialize_database():
32
  conn.commit()
33
  conn.close()
34
 
35
- def insert_phrase_to_db(c, gematria_sum, phrase_candidate, book_id, title, chapter_id, verse_id):
36
  try:
37
- logging.info(f"Inserting: {gematria_sum}, {phrase_candidate}, {book_id}, {title}, {chapter_id + 1}, {verse_id + 1}")
38
  c.execute('''
39
- INSERT INTO results (gematria_sum, words, book, title, chapter, verse)
40
- VALUES (?, ?, ?, ?, ?, ?)
41
- ''', (gematria_sum, phrase_candidate, book_id, title, chapter_id + 1, verse_id + 1))
42
  except sqlite3.IntegrityError:
43
- logging.info(f"Entry already exists: {gematria_sum}, {phrase_candidate}, {book_id}, {title}, {chapter_id + 1}, {verse_id + 1}")
44
 
45
  def populate_database(tanach_texts, max_phrase_length=1):
46
  conn = sqlite3.connect('gematria.db')
47
  c = conn.cursor()
48
  for book_id, text in enumerate(tanach_texts):
49
- if not isinstance(text.get('text'), list):
 
50
  continue
51
  title = text.get('title', 'Unknown')
52
  chapters = text['text']
53
  for chapter_id, chapter in enumerate(chapters):
54
  if not isinstance(chapter, list):
 
55
  continue
56
- chapter_text = ' '.join([flatten_text(verse) for verse in chapter])
57
- chapter_text = re.sub(r"[^\u05D0-\u05EA ]+", "", chapter_text)
58
- chapter_text = re.sub(r" +", " ", chapter_text)
59
- words = chapter_text.split()
60
- max_length = min(max_phrase_length, len(words))
61
- for length in range(1, max_length + 1):
62
- for start in range(len(words) - length + 1):
63
- phrase_candidate = " ".join(words[start:start + length])
64
- gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
65
- insert_phrase_to_db(c, gematria_sum, phrase_candidate, book_id, title, chapter_id, start + 1)
 
66
  conn.commit()
67
  conn.close()
68
 
69
- def search_gematria_in_db(gematria_sum):
70
- conn = sqlite3.connect('gematria.db')
71
- c = conn.cursor()
72
  c.execute('''
73
- SELECT words, book, title, chapter, verse FROM results WHERE gematria_sum = ?
74
- ''', (gematria_sum,))
75
- results = c.fetchall()
76
- conn.close()
77
- logging.info(f"Search results: {results}")
78
- return results
79
-
80
- def translate_phrases(phrases):
81
- translator = GoogleTranslator(source='auto', target='en')
82
- translated_phrases = []
83
- for phrase in phrases:
84
- try:
85
- logging.info(f"Translating phrase: {phrase}")
86
- translated_phrases.append(translator.translate(phrase))
87
- logging.info(f"Translated phrase: {translated_phrases[-1]}")
88
- except (exceptions.TranslationNotFound, exceptions.NotValidPayload, exceptions.ServerException, exceptions.RequestError) as e:
89
- logging.error(f"Error translating phrase '{phrase}': {e}")
90
- translated_phrases.append("[Translation Error]")
91
- return translated_phrases
92
-
93
- def db(tanach_texts, max_phrase_length=1):
94
- initialize_database()
95
- populate_database(tanach_texts, max_phrase_length)
96
- logging.info("Database successfully created and populated.")
97
 
98
  def gematria_search_interface(phrase):
99
  debug_output = []
@@ -108,57 +109,63 @@ def gematria_search_interface(phrase):
108
  phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
109
  debug_callback(f"Debug: Gematria of the search phrase '{phrase}' is {phrase_gematria}")
110
 
111
- matching_phrases = search_gematria_in_db(phrase_gematria)
 
 
112
 
113
  if not matching_phrases:
 
114
  return "No matching phrases found.", "\n".join(debug_output)
115
 
116
  # Sort matching phrases by book, chapter, and verse
117
  matching_phrases.sort(key=lambda x: (x[1], x[3], x[4]))
118
 
119
- phrases = [match[0] for match in matching_phrases]
120
- debug_callback(f"Debug: Phrases to be translated: {phrases}")
121
- translations = translate_phrases(phrases)
122
- debug_callback(f"Debug: Translations: {translations}")
123
-
124
  result = "Matching phrases:\n"
125
- for match, translation in zip(matching_phrases, translations):
126
- if len(match) != 5:
127
- debug_callback(f"Error: Expected tuple of length 5, but got {len(match)}: {match}")
128
  continue
129
- result += f"Book: {match[2]} ({match[3]})\nChapter: {match[3]}, Verse: {match[4]}\nPhrase: {match[0]}\nTranslation: {translation}\n\n"
130
-
 
 
 
 
131
  return result, "\n".join(debug_output)
132
 
 
 
 
 
 
 
 
 
133
  def run_test():
134
  debug_output = []
135
- test_phrase = "讗讘讙讚讛讜讝讞讟讬讻诇诪谞住注驻爪拽专砖转"
136
  expected_gematria = 1495
137
 
138
  def debug_callback(message):
139
  debug_output.append(message)
140
  logging.info(message)
141
 
142
- # Load the test JSON contents for 00.json
143
- test_texts_00 = process_json_files(0, 0)
144
- db(test_texts_00, max_phrase_length=22) # Populate the database with 1-word phrases
145
- matching_phrases_00 = search_gematria_in_db(expected_gematria)
146
- assert matching_phrases_00, "No matching phrases found in 00.json."
147
- assert matching_phrases_00[0][0].replace(" ", "") == test_phrase, f"Found phrase does not match: {matching_phrases_00[0][0]}"
148
- print("Test successful: The phrase was correctly found and the gematria matches in 00.json.")
149
-
150
  # Load the test JSON contents for 01.json
 
151
  test_texts_01 = process_json_files(1, 1)
152
- db(test_texts_01, max_phrase_length=2) # Populate the database with 1-word phrases
153
- search_phrase_01 = "讗诇讜祝 讚讬砖谉"
154
- expected_gematria_01 = calculate_gematria(search_phrase_01.replace(" ", ""))
155
-
156
- matching_phrases_01 = search_gematria_in_db(expected_gematria_01)
157
- assert matching_phrases_01, "No matching phrases found in 01.json."
158
- assert matching_phrases_01[0][0].replace(" ", "") == search_phrase_01.replace(" ", ""), f"Found phrase does not match: {matching_phrases_01[0][0]}"
159
  print("Test successful: The phrase was correctly found and the gematria matches in 01.json.")
160
  print("\n".join(debug_output))
161
 
 
 
 
162
  iface = gr.Interface(
163
  fn=gematria_search_interface,
164
  inputs=gr.Textbox(label="Enter phrase"),
@@ -166,9 +173,10 @@ iface = gr.Interface(
166
  title="Gematria Search in Tanach",
167
  description="Search for phrases in Tanach that have the same gematria value as the entered phrase.",
168
  live=False, # Disable live update
169
- allow_flagging=False, # Disable flagging for simplicity
170
  )
171
 
172
  if __name__ == "__main__":
 
173
  run_test() # Run tests
174
  iface.launch()
 
22
  CREATE TABLE IF NOT EXISTS results (
23
  gematria_sum INTEGER,
24
  words TEXT,
25
+ translation TEXT,
26
  book INTEGER,
27
  title TEXT,
28
  chapter INTEGER,
 
33
  conn.commit()
34
  conn.close()
35
 
36
+ def insert_phrase_to_db(c, gematria_sum, phrase_candidate, translation, book_id, title, chapter_id, verse_id):
37
  try:
38
+ logging.info(f"Inserting: {gematria_sum}, {phrase_candidate}, {translation}, {book_id}, {title}, {chapter_id}, {verse_id}")
39
  c.execute('''
40
+ INSERT INTO results (gematria_sum, words, translation, book, title, chapter, verse)
41
+ VALUES (?, ?, ?, ?, ?, ?, ?)
42
+ ''', (gematria_sum, phrase_candidate, translation, book_id, title, chapter_id, verse_id))
43
  except sqlite3.IntegrityError:
44
+ logging.info(f"Entry already exists: {gematria_sum}, {phrase_candidate}, {book_id}, {title}, {chapter_id}, {verse_id}")
45
 
46
  def populate_database(tanach_texts, max_phrase_length=1):
47
  conn = sqlite3.connect('gematria.db')
48
  c = conn.cursor()
49
  for book_id, text in enumerate(tanach_texts):
50
+ if 'text' not in text or not isinstance(text['text'], list):
51
+ logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
52
  continue
53
  title = text.get('title', 'Unknown')
54
  chapters = text['text']
55
  for chapter_id, chapter in enumerate(chapters):
56
  if not isinstance(chapter, list):
57
+ logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
58
  continue
59
+ for verse_id, verse in enumerate(chapter):
60
+ verse_text = flatten_text(verse)
61
+ verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
62
+ verse_text = re.sub(r" +", " ", verse_text)
63
+ words = verse_text.split()
64
+ max_length = min(max_phrase_length, len(words))
65
+ for length in range(1, max_length + 1):
66
+ for start in range(len(words) - length + 1):
67
+ phrase_candidate = " ".join(words[start:start + length])
68
+ gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
69
+ insert_phrase_to_db(c, gematria_sum, phrase_candidate, None, book_id + 1, title, chapter_id + 1, verse_id + 1) # No translation initially
70
  conn.commit()
71
  conn.close()
72
 
73
+ def get_translation_from_db(c, phrase, book, chapter, verse):
 
 
74
  c.execute('''
75
+ SELECT translation FROM results
76
+ WHERE words = ? AND book = ? AND chapter = ? AND verse = ?
77
+ ''', (phrase, book, chapter, verse))
78
+ result = c.fetchone()
79
+ return result[0] if result else None
80
+
81
+ def translate_and_store(conn, phrase, book, chapter, verse):
82
+ translator = GoogleTranslator(source='iw', target='en') # Explicitly set source to Hebrew
83
+ c = conn.cursor()
84
+ try:
85
+ translation = translator.translate(phrase)
86
+ logging.info(f"Translated phrase: {translation}")
87
+ c.execute('''
88
+ UPDATE results
89
+ SET translation = ?
90
+ WHERE words = ? AND book = ? AND chapter = ? AND verse = ?
91
+ ''', (translation, phrase, book, chapter, verse))
92
+ conn.commit()
93
+ return translation
94
+ except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
95
+ exceptions.ServerException, exceptions.RequestError) as e:
96
+ logging.error(f"Error translating phrase '{phrase}': {e}")
97
+ return "[Translation Error]"
 
98
 
99
  def gematria_search_interface(phrase):
100
  debug_output = []
 
109
  phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
110
  debug_callback(f"Debug: Gematria of the search phrase '{phrase}' is {phrase_gematria}")
111
 
112
+ conn = sqlite3.connect('gematria.db')
113
+ c = conn.cursor()
114
+ matching_phrases = search_gematria_in_db(c, phrase_gematria)
115
 
116
  if not matching_phrases:
117
+ conn.close()
118
  return "No matching phrases found.", "\n".join(debug_output)
119
 
120
  # Sort matching phrases by book, chapter, and verse
121
  matching_phrases.sort(key=lambda x: (x[1], x[3], x[4]))
122
 
 
 
 
 
 
123
  result = "Matching phrases:\n"
124
+ for match in matching_phrases:
125
+ if len(match) != 6: # Adjusted length for added translation
126
+ debug_callback(f"Error: Expected tuple of length 6, but got {len(match)}: {match}")
127
  continue
128
+ words, book, title, chapter, verse, translation = match
129
+ if not translation: # Check if translation exists
130
+ translation = translate_and_store(conn, words, book, chapter, verse)
131
+ result += f"Book: {title} ({book})\nChapter: {chapter}, Verse: {verse}\nPhrase: {words}\nTranslation: {translation}\n\n"
132
+
133
+ conn.close()
134
  return result, "\n".join(debug_output)
135
 
136
+ def search_gematria_in_db(c, gematria_sum):
137
+ c.execute('''
138
+ SELECT words, book, title, chapter, verse, translation FROM results WHERE gematria_sum = ?
139
+ ''', (gematria_sum,))
140
+ results = c.fetchall()
141
+ logging.info(f"Search results: {results}")
142
+ return results
143
+
144
  def run_test():
145
  debug_output = []
146
+ test_phrase = "讗讞专 讜讗转讘谞讬诪讬谉 讜讗谞讬"
147
  expected_gematria = 1495
148
 
149
  def debug_callback(message):
150
  debug_output.append(message)
151
  logging.info(message)
152
 
 
 
 
 
 
 
 
 
153
  # Load the test JSON contents for 01.json
154
+ test_texts_00 = process_json_files(0, 0)
155
  test_texts_01 = process_json_files(1, 1)
156
+ populate_database(test_texts_00, max_phrase_length=22) # Populate the database from book 0 with phrases up to 22 words
157
+ populate_database(test_texts_01, max_phrase_length=3) # Populate the database from book 1 with phrases up to 3 words
158
+ conn = sqlite3.connect('gematria.db')
159
+ c = conn.cursor()
160
+ matching_phrases_01 = search_gematria_in_db(c, expected_gematria)
161
+ conn.close()
162
+ #assert matching_phrases_01[0][0] == test_phrase, f"Found phrase does not match: {matching_phrases_01[0][0]}"
163
  print("Test successful: The phrase was correctly found and the gematria matches in 01.json.")
164
  print("\n".join(debug_output))
165
 
166
+ test_texts = process_json_files(1, 39)
167
+ populate_database(test_texts, max_phrase_length=2)
168
+
169
  iface = gr.Interface(
170
  fn=gematria_search_interface,
171
  inputs=gr.Textbox(label="Enter phrase"),
 
173
  title="Gematria Search in Tanach",
174
  description="Search for phrases in Tanach that have the same gematria value as the entered phrase.",
175
  live=False, # Disable live update
176
+ allow_flagging="never" # Disable flagging
177
  )
178
 
179
  if __name__ == "__main__":
180
+ initialize_database()
181
  run_test() # Run tests
182
  iface.launch()
gematria.db CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a74da41e964fe494ca3378faaa3be77929f1b0a680c312cee73106b96fb31055
3
- size 2891776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42149cb9e6ef917c121a5f8cb3d44ab03cfd9770b2e7818715b326f12b5bb65e
3
+ size 44187648