Spaces:
Running
Running
import json | |
import os | |
import logging | |
import sqlite3 | |
import re | |
from typing import Dict, List, Any | |
from gematria import calculate_gematria, strip_diacritics | |
from deep_translator import GoogleTranslator | |
logger = logging.getLogger(__name__) | |
def process_bible_files(start: int, end: int) -> Dict[int, Dict[str, Any]]: | |
""" | |
Processes Bible JSON files and returns a dictionary mapping book IDs to their data. | |
Args: | |
start: The starting book ID (inclusive). | |
end: The ending book ID (inclusive). | |
Returns: | |
A dictionary where keys are book IDs and values are dictionaries | |
containing 'title' and 'text' fields. | |
""" | |
base_path = "texts/bible" | |
results = {} | |
for i in range(start, end + 1): | |
file_name = f"{base_path}/{i}.json" | |
try: | |
with open(file_name, 'r', encoding='utf-8') as file: | |
data = json.load(file) | |
if data: | |
# Extract title and verses | |
title = data.get("title", "No title") | |
text = data.get("text", []) | |
# Store book ID as key and book data as value | |
results[i] = {"title": title, "text": text} | |
except FileNotFoundError: | |
logger.warning(f"File {file_name} not found.") | |
except json.JSONDecodeError as e: | |
logger.warning(f"File {file_name} could not be read as JSON: {e}") | |
except Exception as e: | |
logger.warning(f"Error processing {file_name}: {e}") | |
return results | |
def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip_spaces=True, strip_in_braces=True, | |
strip_diacritics_value=True, translate=False): | |
""" | |
Processes Bible JSON files and performs ELS (Equidistant Letter Sequence) search. | |
Parameters: | |
- start (int): Start number of the Bible book. | |
- end (int): End number of the Bible book. | |
- step (int): Step size for character selection. | |
- rounds (str): Comma-separated list of round numbers (can include negative values). | |
- length (int): Maximum length of the result text. | |
- tlang (str): Target language for translation. | |
- strip_spaces (bool): Whether to remove spaces from the text. | |
- strip_in_braces (bool): Whether to remove text within braces. | |
- strip_diacritics_value (bool): Whether to remove diacritics from the text. | |
- translate (bool): Whether to translate the result text. | |
Returns: | |
- list: A list of dictionaries containing processed data or error messages. | |
""" | |
logger.debug(f"Processing Bible files {start}-{end} with step {step}, rounds {rounds}") | |
results = [] | |
try: | |
bible_data = process_bible_files(start, end) | |
if not bible_data: | |
return [{"error": f"No Bible data found for books {start}-{end}"}] | |
rounds_list = [int(r.strip()) for r in rounds.split(",")] | |
for book_id, book_info in bible_data.items(): | |
book_title = book_info.get("title", "Unknown") | |
chapters = book_info.get("text", []) | |
if not chapters: | |
results.append({"error": f"No text found for book {book_title} (ID: {book_id})"}) | |
continue | |
# Flatten the text | |
flattened_text = "" | |
for chapter_idx, chapter in enumerate(chapters, 1): | |
for verse_idx, verse in enumerate(chapter, 1): | |
if verse: | |
flattened_text += verse + " " | |
# Clean the text based on parameters | |
processed_text = flattened_text.lower() | |
if strip_in_braces: | |
# Remove content within brackets or parentheses | |
processed_text = re.sub(r'\[.*?\]|\(.*?\)', '', processed_text) | |
if strip_diacritics_value: | |
processed_text = strip_diacritics(processed_text) | |
if strip_spaces: | |
processed_text = processed_text.replace(" ", "") | |
# Perform ELS search for each round | |
for round_num in rounds_list: | |
if round_num == 0: | |
continue | |
direction = 1 if round_num > 0 else -1 | |
abs_step = abs(round_num * step) | |
if direction > 0: | |
# Forward ELS | |
result_chars = [processed_text[i] for i in range(0, len(processed_text), abs_step) if i < len(processed_text)] | |
else: | |
# Backward ELS | |
result_chars = [processed_text[i] for i in range(len(processed_text) - 1, -1, -abs_step)] | |
result_text = "".join(result_chars) | |
# Truncate result if length is specified | |
if length > 0 and len(result_text) > length: | |
result_text = result_text[:length] | |
# Translate if requested | |
translated_text = "" | |
if result_text and translate and tlang != "en": | |
try: | |
translator = GoogleTranslator(source='auto', target=tlang) | |
translated_text = translator.translate(result_text) | |
except Exception as e: | |
logger.warning(f"Translation error: {e}") | |
translated_text = f"Translation error: {str(e)}" | |
# Add result to results list | |
results.append({ | |
"book_id": book_id, | |
"book_title": book_title, | |
"step": step, | |
"round": round_num, | |
"result_text": result_text, | |
"translated_text": translated_text, | |
"gematria": calculate_gematria(result_text) | |
}) | |
except Exception as e: | |
logger.error(f"Error processing Bible files: {e}", exc_info=True) | |
results.append({"error": f"Error processing Bible files: {str(e)}"}) | |
return results if results else None | |
# This function is not needed anymore as we're using get_first_els_result_matthew from app.py | |
# Keeping the definition for compatibility but marking it as deprecated | |
def get_first_els_result_john(gematria_sum, tlang="en"): | |
""" | |
DEPRECATED: Use get_first_els_result_matthew instead. | |
Gets the first ELS result from John's Gospel (book 43) using the specified step size. | |
""" | |
logger.warning("get_first_els_result_john is deprecated, use get_first_els_result_matthew instead") | |
from app import cached_process_json_files, get_first_els_result_matthew | |
return get_first_els_result_matthew(gematria_sum, tlang) | |
def create_bible_display_iframe(book_title, book_id, chapter=None, verse=None): | |
"""Creates an iframe HTML string for BibleGateway.""" | |
from urllib.parse import quote_plus | |
logger.debug(f"Creating Bible iframe for {book_title}, book_id: {book_id}, chapter: {chapter}, verse: {verse}") | |
encoded_book_title = quote_plus(book_title) | |
chapter_verse = "" | |
if chapter is not None: | |
chapter_verse = f"+{chapter}" | |
if verse is not None: | |
chapter_verse += f":{verse}" | |
url = f"https://www.biblegateway.com/passage/?search={encoded_book_title}{chapter_verse}&version=CJB" | |
iframe = f'<iframe src="{url}" width="800" height="600"></iframe>' | |
return iframe | |
def initialize_bible_database(db_file: str = 'bible.db', max_phrase_length: int = 1): | |
""" | |
Initializes the Bible database with verse texts. | |
This function processes all Bible JSON files and adds their gematria values to the database. | |
Args: | |
db_file: The SQLite database file to use | |
max_phrase_length: Maximum phrase length to process | |
""" | |
import re | |
from gematria import calculate_gematria, strip_diacritics | |
from tqdm import tqdm # Import tqdm for progress bars | |
logger.info(f"Initializing Bible database: {db_file}") | |
# Create the database if it doesn't exist | |
with sqlite3.connect(db_file) as conn: | |
cursor = conn.cursor() | |
# Create results table | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS results ( | |
gematria_sum INTEGER, | |
words TEXT, | |
translation TEXT, | |
book TEXT, | |
chapter INTEGER, | |
verse INTEGER, | |
phrase_length INTEGER, | |
word_position TEXT, | |
PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position) | |
) | |
''') | |
cursor.execute(''' | |
CREATE INDEX IF NOT EXISTS idx_results_gematria | |
ON results (gematria_sum) | |
''') | |
# Create processed_books table to track processing | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS processed_books ( | |
book TEXT PRIMARY KEY, | |
max_phrase_length INTEGER | |
) | |
''') | |
conn.commit() | |
# Process Bible files from books 40-66 (New Testament) | |
book_start = 40 | |
book_end = 66 | |
logger.info(f"Processing Bible books {book_start}-{book_end}") | |
# Global counter for word position tracking | |
total_word_count = 0 | |
book_names = {} | |
with sqlite3.connect(db_file) as conn: | |
cursor = conn.cursor() | |
# Process each book | |
for book_id in tqdm(range(book_start, book_end + 1), desc="Processing Bible Books"): | |
# Load book data | |
book_data = process_bible_files(book_id, book_id) | |
if book_id in book_data: | |
book_info = book_data[book_id] | |
book_title = book_info['title'] | |
book_names[book_id] = book_title | |
# Check if this book has already been processed | |
cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_title,)) | |
result = cursor.fetchone() | |
if result and result[0] >= max_phrase_length: | |
logger.info(f"Skipping book {book_title}: Already processed with max_phrase_length {result[0]}") | |
continue | |
chapters = book_info['text'] | |
phrases_to_insert = [] | |
for chapter_idx, chapter in enumerate(chapters, 1): | |
for verse_idx, verse_text in enumerate(chapter, 1): | |
if not verse_text: | |
continue | |
# Split verse into words | |
words = verse_text.split() | |
# Process phrases of different lengths | |
for length in range(1, max_phrase_length + 1): | |
for start in range(len(words) - length + 1): | |
phrase = " ".join(words[start:start + length]) | |
cleaned_phrase = strip_diacritics(phrase) | |
gematria_sum = calculate_gematria(cleaned_phrase.replace(" ", "")) | |
# Calculate word position range | |
word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}" | |
# Add to batch insert list | |
phrases_to_insert.append( | |
(gematria_sum, cleaned_phrase, "", book_title, chapter_idx, verse_idx, length, word_position_range) | |
) | |
# Update total word count after processing each verse | |
total_word_count += len(words) | |
# If we have phrases to insert, do a batch insert | |
if phrases_to_insert: | |
try: | |
cursor.executemany(''' | |
INSERT OR REPLACE INTO results | |
(gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position) | |
VALUES (?, ?, ?, ?, ?, ?, ?, ?) | |
''', phrases_to_insert) | |
# Update the processed_books table | |
cursor.execute(''' | |
INSERT OR REPLACE INTO processed_books (book, max_phrase_length) | |
VALUES (?, ?) | |
''', (book_title, max_phrase_length)) | |
conn.commit() | |
logger.info(f"Processed book {book_title}: inserted {len(phrases_to_insert)} phrases") | |
except sqlite3.Error as e: | |
logger.error(f"Database error processing {book_title}: {e}") | |
else: | |
logger.warning(f"No data found for book ID {book_id}") | |
logger.info(f"Bible database initialization completed. Processed {len(book_names)} books.") | |
return book_names | |
def find_shortest_bible_match(gematria_sum: int, db_file: str = 'bible.db') -> Dict[str, Any]: | |
""" | |
Finds the shortest Bible verse in John that matches the given gematria sum. | |
Args: | |
gematria_sum: The gematria sum to match | |
db_file: The SQLite database file to search in | |
Returns: | |
A dictionary with the matching verse information or None if no match is found | |
""" | |
logger.debug(f"Finding shortest Bible match for gematria sum: {gematria_sum} in {db_file}") | |
try: | |
with sqlite3.connect(db_file) as conn: | |
cursor = conn.cursor() | |
cursor.execute(''' | |
SELECT words, book, chapter, verse, phrase_length, word_position | |
FROM results | |
WHERE gematria_sum = ? AND book = 'Revelation' | |
ORDER BY LENGTH(words) ASC | |
LIMIT 1 | |
''', (gematria_sum,)) | |
result = cursor.fetchone() | |
if result: | |
logger.debug(f"Found Bible match: {result}") | |
return { | |
"words": result[0], | |
"book": result[1], | |
"chapter": result[2], | |
"verse": result[3], | |
"phrase_length": result[4], | |
"word_position": result[5] if len(result) > 5 else None | |
} | |
else: | |
logger.debug(f"No matching verse found in John for gematria sum: {gematria_sum}") | |
return None | |
except sqlite3.Error as e: | |
logger.error(f"Database error when finding Bible match: {e}") | |
return None | |
except Exception as e: | |
logger.error(f"Unexpected error when finding Bible match: {e}") | |
return None | |