daily_psalm / bible.py
neuralworm's picture
Update bible.py
6d08b4f verified
import json
import os
import logging
import sqlite3
import re
from typing import Dict, List, Any
from gematria import calculate_gematria, strip_diacritics
from deep_translator import GoogleTranslator
logger = logging.getLogger(__name__)
def process_bible_files(start: int, end: int) -> Dict[int, Dict[str, Any]]:
"""
Processes Bible JSON files and returns a dictionary mapping book IDs to their data.
Args:
start: The starting book ID (inclusive).
end: The ending book ID (inclusive).
Returns:
A dictionary where keys are book IDs and values are dictionaries
containing 'title' and 'text' fields.
"""
base_path = "texts/bible"
results = {}
for i in range(start, end + 1):
file_name = f"{base_path}/{i}.json"
try:
with open(file_name, 'r', encoding='utf-8') as file:
data = json.load(file)
if data:
# Extract title and verses
title = data.get("title", "No title")
text = data.get("text", [])
# Store book ID as key and book data as value
results[i] = {"title": title, "text": text}
except FileNotFoundError:
logger.warning(f"File {file_name} not found.")
except json.JSONDecodeError as e:
logger.warning(f"File {file_name} could not be read as JSON: {e}")
except Exception as e:
logger.warning(f"Error processing {file_name}: {e}")
return results
def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip_spaces=True, strip_in_braces=True,
strip_diacritics_value=True, translate=False):
"""
Processes Bible JSON files and performs ELS (Equidistant Letter Sequence) search.
Parameters:
- start (int): Start number of the Bible book.
- end (int): End number of the Bible book.
- step (int): Step size for character selection.
- rounds (str): Comma-separated list of round numbers (can include negative values).
- length (int): Maximum length of the result text.
- tlang (str): Target language for translation.
- strip_spaces (bool): Whether to remove spaces from the text.
- strip_in_braces (bool): Whether to remove text within braces.
- strip_diacritics_value (bool): Whether to remove diacritics from the text.
- translate (bool): Whether to translate the result text.
Returns:
- list: A list of dictionaries containing processed data or error messages.
"""
logger.debug(f"Processing Bible files {start}-{end} with step {step}, rounds {rounds}")
results = []
try:
bible_data = process_bible_files(start, end)
if not bible_data:
return [{"error": f"No Bible data found for books {start}-{end}"}]
rounds_list = [int(r.strip()) for r in rounds.split(",")]
for book_id, book_info in bible_data.items():
book_title = book_info.get("title", "Unknown")
chapters = book_info.get("text", [])
if not chapters:
results.append({"error": f"No text found for book {book_title} (ID: {book_id})"})
continue
# Flatten the text
flattened_text = ""
for chapter_idx, chapter in enumerate(chapters, 1):
for verse_idx, verse in enumerate(chapter, 1):
if verse:
flattened_text += verse + " "
# Clean the text based on parameters
processed_text = flattened_text.lower()
if strip_in_braces:
# Remove content within brackets or parentheses
processed_text = re.sub(r'\[.*?\]|\(.*?\)', '', processed_text)
if strip_diacritics_value:
processed_text = strip_diacritics(processed_text)
if strip_spaces:
processed_text = processed_text.replace(" ", "")
# Perform ELS search for each round
for round_num in rounds_list:
if round_num == 0:
continue
direction = 1 if round_num > 0 else -1
abs_step = abs(round_num * step)
if direction > 0:
# Forward ELS
result_chars = [processed_text[i] for i in range(0, len(processed_text), abs_step) if i < len(processed_text)]
else:
# Backward ELS
result_chars = [processed_text[i] for i in range(len(processed_text) - 1, -1, -abs_step)]
result_text = "".join(result_chars)
# Truncate result if length is specified
if length > 0 and len(result_text) > length:
result_text = result_text[:length]
# Translate if requested
translated_text = ""
if result_text and translate and tlang != "en":
try:
translator = GoogleTranslator(source='auto', target=tlang)
translated_text = translator.translate(result_text)
except Exception as e:
logger.warning(f"Translation error: {e}")
translated_text = f"Translation error: {str(e)}"
# Add result to results list
results.append({
"book_id": book_id,
"book_title": book_title,
"step": step,
"round": round_num,
"result_text": result_text,
"translated_text": translated_text,
"gematria": calculate_gematria(result_text)
})
except Exception as e:
logger.error(f"Error processing Bible files: {e}", exc_info=True)
results.append({"error": f"Error processing Bible files: {str(e)}"})
return results if results else None
# This function is not needed anymore as we're using get_first_els_result_matthew from app.py
# Keeping the definition for compatibility but marking it as deprecated
def get_first_els_result_john(gematria_sum, tlang="en"):
"""
DEPRECATED: Use get_first_els_result_matthew instead.
Gets the first ELS result from John's Gospel (book 43) using the specified step size.
"""
logger.warning("get_first_els_result_john is deprecated, use get_first_els_result_matthew instead")
from app import cached_process_json_files, get_first_els_result_matthew
return get_first_els_result_matthew(gematria_sum, tlang)
def create_bible_display_iframe(book_title, book_id, chapter=None, verse=None):
"""Creates an iframe HTML string for BibleGateway."""
from urllib.parse import quote_plus
logger.debug(f"Creating Bible iframe for {book_title}, book_id: {book_id}, chapter: {chapter}, verse: {verse}")
encoded_book_title = quote_plus(book_title)
chapter_verse = ""
if chapter is not None:
chapter_verse = f"+{chapter}"
if verse is not None:
chapter_verse += f":{verse}"
url = f"https://www.biblegateway.com/passage/?search={encoded_book_title}{chapter_verse}&version=CJB"
iframe = f'<iframe src="{url}" width="800" height="600"></iframe>'
return iframe
def initialize_bible_database(db_file: str = 'bible.db', max_phrase_length: int = 1):
"""
Initializes the Bible database with verse texts.
This function processes all Bible JSON files and adds their gematria values to the database.
Args:
db_file: The SQLite database file to use
max_phrase_length: Maximum phrase length to process
"""
import re
from gematria import calculate_gematria, strip_diacritics
from tqdm import tqdm # Import tqdm for progress bars
logger.info(f"Initializing Bible database: {db_file}")
# Create the database if it doesn't exist
with sqlite3.connect(db_file) as conn:
cursor = conn.cursor()
# Create results table
cursor.execute('''
CREATE TABLE IF NOT EXISTS results (
gematria_sum INTEGER,
words TEXT,
translation TEXT,
book TEXT,
chapter INTEGER,
verse INTEGER,
phrase_length INTEGER,
word_position TEXT,
PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position)
)
''')
cursor.execute('''
CREATE INDEX IF NOT EXISTS idx_results_gematria
ON results (gematria_sum)
''')
# Create processed_books table to track processing
cursor.execute('''
CREATE TABLE IF NOT EXISTS processed_books (
book TEXT PRIMARY KEY,
max_phrase_length INTEGER
)
''')
conn.commit()
# Process Bible files from books 40-66 (New Testament)
book_start = 40
book_end = 66
logger.info(f"Processing Bible books {book_start}-{book_end}")
# Global counter for word position tracking
total_word_count = 0
book_names = {}
with sqlite3.connect(db_file) as conn:
cursor = conn.cursor()
# Process each book
for book_id in tqdm(range(book_start, book_end + 1), desc="Processing Bible Books"):
# Load book data
book_data = process_bible_files(book_id, book_id)
if book_id in book_data:
book_info = book_data[book_id]
book_title = book_info['title']
book_names[book_id] = book_title
# Check if this book has already been processed
cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_title,))
result = cursor.fetchone()
if result and result[0] >= max_phrase_length:
logger.info(f"Skipping book {book_title}: Already processed with max_phrase_length {result[0]}")
continue
chapters = book_info['text']
phrases_to_insert = []
for chapter_idx, chapter in enumerate(chapters, 1):
for verse_idx, verse_text in enumerate(chapter, 1):
if not verse_text:
continue
# Split verse into words
words = verse_text.split()
# Process phrases of different lengths
for length in range(1, max_phrase_length + 1):
for start in range(len(words) - length + 1):
phrase = " ".join(words[start:start + length])
cleaned_phrase = strip_diacritics(phrase)
gematria_sum = calculate_gematria(cleaned_phrase.replace(" ", ""))
# Calculate word position range
word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}"
# Add to batch insert list
phrases_to_insert.append(
(gematria_sum, cleaned_phrase, "", book_title, chapter_idx, verse_idx, length, word_position_range)
)
# Update total word count after processing each verse
total_word_count += len(words)
# If we have phrases to insert, do a batch insert
if phrases_to_insert:
try:
cursor.executemany('''
INSERT OR REPLACE INTO results
(gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
''', phrases_to_insert)
# Update the processed_books table
cursor.execute('''
INSERT OR REPLACE INTO processed_books (book, max_phrase_length)
VALUES (?, ?)
''', (book_title, max_phrase_length))
conn.commit()
logger.info(f"Processed book {book_title}: inserted {len(phrases_to_insert)} phrases")
except sqlite3.Error as e:
logger.error(f"Database error processing {book_title}: {e}")
else:
logger.warning(f"No data found for book ID {book_id}")
logger.info(f"Bible database initialization completed. Processed {len(book_names)} books.")
return book_names
def find_shortest_bible_match(gematria_sum: int, db_file: str = 'bible.db') -> Dict[str, Any]:
"""
Finds the shortest Bible verse in John that matches the given gematria sum.
Args:
gematria_sum: The gematria sum to match
db_file: The SQLite database file to search in
Returns:
A dictionary with the matching verse information or None if no match is found
"""
logger.debug(f"Finding shortest Bible match for gematria sum: {gematria_sum} in {db_file}")
try:
with sqlite3.connect(db_file) as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT words, book, chapter, verse, phrase_length, word_position
FROM results
WHERE gematria_sum = ? AND book = 'Revelation'
ORDER BY LENGTH(words) ASC
LIMIT 1
''', (gematria_sum,))
result = cursor.fetchone()
if result:
logger.debug(f"Found Bible match: {result}")
return {
"words": result[0],
"book": result[1],
"chapter": result[2],
"verse": result[3],
"phrase_length": result[4],
"word_position": result[5] if len(result) > 5 else None
}
else:
logger.debug(f"No matching verse found in John for gematria sum: {gematria_sum}")
return None
except sqlite3.Error as e:
logger.error(f"Database error when finding Bible match: {e}")
return None
except Exception as e:
logger.error(f"Unexpected error when finding Bible match: {e}")
return None