Spaces:

neuralworm
/

daily_psalm

Running

App Files Files Community

daily_psalm / bible.py

neuralworm

Update bible.py

6d08b4f verified 16 days ago

raw

history blame contribute delete

15.1 kB

	import json
	import os
	import logging
	import sqlite3
	import re
	from typing import Dict, List, Any
	from gematria import calculate_gematria, strip_diacritics
	from deep_translator import GoogleTranslator

	logger = logging.getLogger(__name__)

	def process_bible_files(start: int, end: int) -> Dict[int, Dict[str, Any]]:
	"""
	Processes Bible JSON files and returns a dictionary mapping book IDs to their data.

	Args:
	start: The starting book ID (inclusive).
	end: The ending book ID (inclusive).

	Returns:
	A dictionary where keys are book IDs and values are dictionaries
	containing 'title' and 'text' fields.
	"""
	base_path = "texts/bible"
	results = {}

	for i in range(start, end + 1):
	file_name = f"{base_path}/{i}.json"
	try:
	with open(file_name, 'r', encoding='utf-8') as file:
	data = json.load(file)
	if data:
	# Extract title and verses
	title = data.get("title", "No title")
	text = data.get("text", [])

	# Store book ID as key and book data as value
	results[i] = {"title": title, "text": text}

	except FileNotFoundError:
	logger.warning(f"File {file_name} not found.")
	except json.JSONDecodeError as e:
	logger.warning(f"File {file_name} could not be read as JSON: {e}")
	except Exception as e:
	logger.warning(f"Error processing {file_name}: {e}")

	return results

	def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip_spaces=True, strip_in_braces=True,
	strip_diacritics_value=True, translate=False):
	"""
	Processes Bible JSON files and performs ELS (Equidistant Letter Sequence) search.

	Parameters:
	- start (int): Start number of the Bible book.
	- end (int): End number of the Bible book.
	- step (int): Step size for character selection.
	- rounds (str): Comma-separated list of round numbers (can include negative values).
	- length (int): Maximum length of the result text.
	- tlang (str): Target language for translation.
	- strip_spaces (bool): Whether to remove spaces from the text.
	- strip_in_braces (bool): Whether to remove text within braces.
	- strip_diacritics_value (bool): Whether to remove diacritics from the text.
	- translate (bool): Whether to translate the result text.

	Returns:
	- list: A list of dictionaries containing processed data or error messages.
	"""
	logger.debug(f"Processing Bible files {start}-{end} with step {step}, rounds {rounds}")
	results = []

	try:
	bible_data = process_bible_files(start, end)

	if not bible_data:
	return [{"error": f"No Bible data found for books {start}-{end}"}]

	rounds_list = [int(r.strip()) for r in rounds.split(",")]

	for book_id, book_info in bible_data.items():
	book_title = book_info.get("title", "Unknown")
	chapters = book_info.get("text", [])

	if not chapters:
	results.append({"error": f"No text found for book {book_title} (ID: {book_id})"})
	continue

	# Flatten the text
	flattened_text = ""
	for chapter_idx, chapter in enumerate(chapters, 1):
	for verse_idx, verse in enumerate(chapter, 1):
	if verse:
	flattened_text += verse + " "

	# Clean the text based on parameters
	processed_text = flattened_text.lower()

	if strip_in_braces:
	# Remove content within brackets or parentheses
	processed_text = re.sub(r'\[.?\]\|\(.?\)', '', processed_text)

	if strip_diacritics_value:
	processed_text = strip_diacritics(processed_text)

	if strip_spaces:
	processed_text = processed_text.replace(" ", "")

	# Perform ELS search for each round
	for round_num in rounds_list:
	if round_num == 0:
	continue

	direction = 1 if round_num > 0 else -1
	abs_step = abs(round_num * step)

	if direction > 0:
	# Forward ELS
	result_chars = [processed_text[i] for i in range(0, len(processed_text), abs_step) if i < len(processed_text)]
	else:
	# Backward ELS
	result_chars = [processed_text[i] for i in range(len(processed_text) - 1, -1, -abs_step)]

	result_text = "".join(result_chars)

	# Truncate result if length is specified
	if length > 0 and len(result_text) > length:
	result_text = result_text[:length]

	# Translate if requested
	translated_text = ""
	if result_text and translate and tlang != "en":
	try:
	translator = GoogleTranslator(source='auto', target=tlang)
	translated_text = translator.translate(result_text)
	except Exception as e:
	logger.warning(f"Translation error: {e}")
	translated_text = f"Translation error: {str(e)}"

	# Add result to results list
	results.append({
	"book_id": book_id,
	"book_title": book_title,
	"step": step,
	"round": round_num,
	"result_text": result_text,
	"translated_text": translated_text,
	"gematria": calculate_gematria(result_text)
	})

	except Exception as e:
	logger.error(f"Error processing Bible files: {e}", exc_info=True)
	results.append({"error": f"Error processing Bible files: {str(e)}"})

	return results if results else None

	# This function is not needed anymore as we're using get_first_els_result_matthew from app.py
	# Keeping the definition for compatibility but marking it as deprecated
	def get_first_els_result_john(gematria_sum, tlang="en"):
	"""
	DEPRECATED: Use get_first_els_result_matthew instead.
	Gets the first ELS result from John's Gospel (book 43) using the specified step size.
	"""
	logger.warning("get_first_els_result_john is deprecated, use get_first_els_result_matthew instead")
	from app import cached_process_json_files, get_first_els_result_matthew

	return get_first_els_result_matthew(gematria_sum, tlang)

	def create_bible_display_iframe(book_title, book_id, chapter=None, verse=None):
	"""Creates an iframe HTML string for BibleGateway."""
	from urllib.parse import quote_plus

	logger.debug(f"Creating Bible iframe for {book_title}, book_id: {book_id}, chapter: {chapter}, verse: {verse}")

	encoded_book_title = quote_plus(book_title)
	chapter_verse = ""
	if chapter is not None:
	chapter_verse = f"+{chapter}"
	if verse is not None:
	chapter_verse += f":{verse}"

	url = f"https://www.biblegateway.com/passage/?search={encoded_book_title}{chapter_verse}&version=CJB"
	iframe = f'<iframe src="{url}" width="800" height="600"></iframe>'

	return iframe

	def initialize_bible_database(db_file: str = 'bible.db', max_phrase_length: int = 1):
	"""
	Initializes the Bible database with verse texts.
	This function processes all Bible JSON files and adds their gematria values to the database.

	Args:
	db_file: The SQLite database file to use
	max_phrase_length: Maximum phrase length to process
	"""
	import re
	from gematria import calculate_gematria, strip_diacritics
	from tqdm import tqdm # Import tqdm for progress bars

	logger.info(f"Initializing Bible database: {db_file}")

	# Create the database if it doesn't exist
	with sqlite3.connect(db_file) as conn:
	cursor = conn.cursor()
	# Create results table
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS results (
	gematria_sum INTEGER,
	words TEXT,
	translation TEXT,
	book TEXT,
	chapter INTEGER,
	verse INTEGER,
	phrase_length INTEGER,
	word_position TEXT,
	PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position)
	)
	''')

	cursor.execute('''
	CREATE INDEX IF NOT EXISTS idx_results_gematria
	ON results (gematria_sum)
	''')

	# Create processed_books table to track processing
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS processed_books (
	book TEXT PRIMARY KEY,
	max_phrase_length INTEGER
	)
	''')

	conn.commit()

	# Process Bible files from books 40-66 (New Testament)
	book_start = 40
	book_end = 66
	logger.info(f"Processing Bible books {book_start}-{book_end}")

	# Global counter for word position tracking
	total_word_count = 0
	book_names = {}

	with sqlite3.connect(db_file) as conn:
	cursor = conn.cursor()

	# Process each book
	for book_id in tqdm(range(book_start, book_end + 1), desc="Processing Bible Books"):
	# Load book data
	book_data = process_bible_files(book_id, book_id)

	if book_id in book_data:
	book_info = book_data[book_id]
	book_title = book_info['title']
	book_names[book_id] = book_title

	# Check if this book has already been processed
	cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_title,))
	result = cursor.fetchone()
	if result and result[0] >= max_phrase_length:
	logger.info(f"Skipping book {book_title}: Already processed with max_phrase_length {result[0]}")
	continue

	chapters = book_info['text']
	phrases_to_insert = []

	for chapter_idx, chapter in enumerate(chapters, 1):
	for verse_idx, verse_text in enumerate(chapter, 1):
	if not verse_text:
	continue

	# Split verse into words
	words = verse_text.split()

	# Process phrases of different lengths
	for length in range(1, max_phrase_length + 1):
	for start in range(len(words) - length + 1):
	phrase = " ".join(words[start:start + length])
	cleaned_phrase = strip_diacritics(phrase)
	gematria_sum = calculate_gematria(cleaned_phrase.replace(" ", ""))

	# Calculate word position range
	word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}"

	# Add to batch insert list
	phrases_to_insert.append(
	(gematria_sum, cleaned_phrase, "", book_title, chapter_idx, verse_idx, length, word_position_range)
	)

	# Update total word count after processing each verse
	total_word_count += len(words)

	# If we have phrases to insert, do a batch insert
	if phrases_to_insert:
	try:
	cursor.executemany('''
	INSERT OR REPLACE INTO results
	(gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position)
	VALUES (?, ?, ?, ?, ?, ?, ?, ?)
	''', phrases_to_insert)

	# Update the processed_books table
	cursor.execute('''
	INSERT OR REPLACE INTO processed_books (book, max_phrase_length)
	VALUES (?, ?)
	''', (book_title, max_phrase_length))

	conn.commit()
	logger.info(f"Processed book {book_title}: inserted {len(phrases_to_insert)} phrases")
	except sqlite3.Error as e:
	logger.error(f"Database error processing {book_title}: {e}")
	else:
	logger.warning(f"No data found for book ID {book_id}")

	logger.info(f"Bible database initialization completed. Processed {len(book_names)} books.")
	return book_names

	def find_shortest_bible_match(gematria_sum: int, db_file: str = 'bible.db') -> Dict[str, Any]:
	"""
	Finds the shortest Bible verse in John that matches the given gematria sum.

	Args:
	gematria_sum: The gematria sum to match
	db_file: The SQLite database file to search in

	Returns:
	A dictionary with the matching verse information or None if no match is found
	"""
	logger.debug(f"Finding shortest Bible match for gematria sum: {gematria_sum} in {db_file}")

	try:
	with sqlite3.connect(db_file) as conn:
	cursor = conn.cursor()
	cursor.execute('''
	SELECT words, book, chapter, verse, phrase_length, word_position
	FROM results
	WHERE gematria_sum = ? AND book = 'Revelation'
	ORDER BY LENGTH(words) ASC
	LIMIT 1
	''', (gematria_sum,))

	result = cursor.fetchone()

	if result:
	logger.debug(f"Found Bible match: {result}")
	return {
	"words": result[0],
	"book": result[1],
	"chapter": result[2],
	"verse": result[3],
	"phrase_length": result[4],
	"word_position": result[5] if len(result) > 5 else None
	}
	else:
	logger.debug(f"No matching verse found in John for gematria sum: {gematria_sum}")
	return None

	except sqlite3.Error as e:
	logger.error(f"Database error when finding Bible match: {e}")
	return None
	except Exception as e:
	logger.error(f"Unexpected error when finding Bible match: {e}")
	return None