import json import os import logging import sqlite3 import re from typing import Dict, List, Any from gematria import calculate_gematria, strip_diacritics from deep_translator import GoogleTranslator logger = logging.getLogger(__name__) def process_bible_files(start: int, end: int) -> Dict[int, Dict[str, Any]]: """ Processes Bible JSON files and returns a dictionary mapping book IDs to their data. Args: start: The starting book ID (inclusive). end: The ending book ID (inclusive). Returns: A dictionary where keys are book IDs and values are dictionaries containing 'title' and 'text' fields. """ base_path = "texts/bible" results = {} for i in range(start, end + 1): file_name = f"{base_path}/{i}.json" try: with open(file_name, 'r', encoding='utf-8') as file: data = json.load(file) if data: # Extract title and verses title = data.get("title", "No title") text = data.get("text", []) # Store book ID as key and book data as value results[i] = {"title": title, "text": text} except FileNotFoundError: logger.warning(f"File {file_name} not found.") except json.JSONDecodeError as e: logger.warning(f"File {file_name} could not be read as JSON: {e}") except Exception as e: logger.warning(f"Error processing {file_name}: {e}") return results def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip_spaces=True, strip_in_braces=True, strip_diacritics_value=True, translate=False): """ Processes Bible JSON files and performs ELS (Equidistant Letter Sequence) search. Parameters: - start (int): Start number of the Bible book. - end (int): End number of the Bible book. - step (int): Step size for character selection. - rounds (str): Comma-separated list of round numbers (can include negative values). - length (int): Maximum length of the result text. - tlang (str): Target language for translation. - strip_spaces (bool): Whether to remove spaces from the text. - strip_in_braces (bool): Whether to remove text within braces. - strip_diacritics_value (bool): Whether to remove diacritics from the text. - translate (bool): Whether to translate the result text. Returns: - list: A list of dictionaries containing processed data or error messages. """ logger.debug(f"Processing Bible files {start}-{end} with step {step}, rounds {rounds}") results = [] try: bible_data = process_bible_files(start, end) if not bible_data: return [{"error": f"No Bible data found for books {start}-{end}"}] rounds_list = [int(r.strip()) for r in rounds.split(",")] for book_id, book_info in bible_data.items(): book_title = book_info.get("title", "Unknown") chapters = book_info.get("text", []) if not chapters: results.append({"error": f"No text found for book {book_title} (ID: {book_id})"}) continue # Flatten the text flattened_text = "" for chapter_idx, chapter in enumerate(chapters, 1): for verse_idx, verse in enumerate(chapter, 1): if verse: flattened_text += verse + " " # Clean the text based on parameters processed_text = flattened_text.lower() if strip_in_braces: # Remove content within brackets or parentheses processed_text = re.sub(r'\[.*?\]|\(.*?\)', '', processed_text) if strip_diacritics_value: processed_text = strip_diacritics(processed_text) if strip_spaces: processed_text = processed_text.replace(" ", "") # Perform ELS search for each round for round_num in rounds_list: if round_num == 0: continue direction = 1 if round_num > 0 else -1 abs_step = abs(round_num * step) if direction > 0: # Forward ELS result_chars = [processed_text[i] for i in range(0, len(processed_text), abs_step) if i < len(processed_text)] else: # Backward ELS result_chars = [processed_text[i] for i in range(len(processed_text) - 1, -1, -abs_step)] result_text = "".join(result_chars) # Truncate result if length is specified if length > 0 and len(result_text) > length: result_text = result_text[:length] # Translate if requested translated_text = "" if result_text and translate and tlang != "en": try: translator = GoogleTranslator(source='auto', target=tlang) translated_text = translator.translate(result_text) except Exception as e: logger.warning(f"Translation error: {e}") translated_text = f"Translation error: {str(e)}" # Add result to results list results.append({ "book_id": book_id, "book_title": book_title, "step": step, "round": round_num, "result_text": result_text, "translated_text": translated_text, "gematria": calculate_gematria(result_text) }) except Exception as e: logger.error(f"Error processing Bible files: {e}", exc_info=True) results.append({"error": f"Error processing Bible files: {str(e)}"}) return results if results else None # This function is not needed anymore as we're using get_first_els_result_matthew from app.py # Keeping the definition for compatibility but marking it as deprecated def get_first_els_result_john(gematria_sum, tlang="en"): """ DEPRECATED: Use get_first_els_result_matthew instead. Gets the first ELS result from John's Gospel (book 43) using the specified step size. """ logger.warning("get_first_els_result_john is deprecated, use get_first_els_result_matthew instead") from app import cached_process_json_files, get_first_els_result_matthew return get_first_els_result_matthew(gematria_sum, tlang) def create_bible_display_iframe(book_title, book_id, chapter=None, verse=None): """Creates an iframe HTML string for BibleGateway.""" from urllib.parse import quote_plus logger.debug(f"Creating Bible iframe for {book_title}, book_id: {book_id}, chapter: {chapter}, verse: {verse}") encoded_book_title = quote_plus(book_title) chapter_verse = "" if chapter is not None: chapter_verse = f"+{chapter}" if verse is not None: chapter_verse += f":{verse}" url = f"https://www.biblegateway.com/passage/?search={encoded_book_title}{chapter_verse}&version=CJB" iframe = f'' return iframe def initialize_bible_database(db_file: str = 'bible.db', max_phrase_length: int = 1): """ Initializes the Bible database with verse texts. This function processes all Bible JSON files and adds their gematria values to the database. Args: db_file: The SQLite database file to use max_phrase_length: Maximum phrase length to process """ import re from gematria import calculate_gematria, strip_diacritics from tqdm import tqdm # Import tqdm for progress bars logger.info(f"Initializing Bible database: {db_file}") # Create the database if it doesn't exist with sqlite3.connect(db_file) as conn: cursor = conn.cursor() # Create results table cursor.execute(''' CREATE TABLE IF NOT EXISTS results ( gematria_sum INTEGER, words TEXT, translation TEXT, book TEXT, chapter INTEGER, verse INTEGER, phrase_length INTEGER, word_position TEXT, PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position) ) ''') cursor.execute(''' CREATE INDEX IF NOT EXISTS idx_results_gematria ON results (gematria_sum) ''') # Create processed_books table to track processing cursor.execute(''' CREATE TABLE IF NOT EXISTS processed_books ( book TEXT PRIMARY KEY, max_phrase_length INTEGER ) ''') conn.commit() # Process Bible files from books 40-66 (New Testament) book_start = 40 book_end = 66 logger.info(f"Processing Bible books {book_start}-{book_end}") # Global counter for word position tracking total_word_count = 0 book_names = {} with sqlite3.connect(db_file) as conn: cursor = conn.cursor() # Process each book for book_id in tqdm(range(book_start, book_end + 1), desc="Processing Bible Books"): # Load book data book_data = process_bible_files(book_id, book_id) if book_id in book_data: book_info = book_data[book_id] book_title = book_info['title'] book_names[book_id] = book_title # Check if this book has already been processed cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_title,)) result = cursor.fetchone() if result and result[0] >= max_phrase_length: logger.info(f"Skipping book {book_title}: Already processed with max_phrase_length {result[0]}") continue chapters = book_info['text'] phrases_to_insert = [] for chapter_idx, chapter in enumerate(chapters, 1): for verse_idx, verse_text in enumerate(chapter, 1): if not verse_text: continue # Split verse into words words = verse_text.split() # Process phrases of different lengths for length in range(1, max_phrase_length + 1): for start in range(len(words) - length + 1): phrase = " ".join(words[start:start + length]) cleaned_phrase = strip_diacritics(phrase) gematria_sum = calculate_gematria(cleaned_phrase.replace(" ", "")) # Calculate word position range word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}" # Add to batch insert list phrases_to_insert.append( (gematria_sum, cleaned_phrase, "", book_title, chapter_idx, verse_idx, length, word_position_range) ) # Update total word count after processing each verse total_word_count += len(words) # If we have phrases to insert, do a batch insert if phrases_to_insert: try: cursor.executemany(''' INSERT OR REPLACE INTO results (gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ''', phrases_to_insert) # Update the processed_books table cursor.execute(''' INSERT OR REPLACE INTO processed_books (book, max_phrase_length) VALUES (?, ?) ''', (book_title, max_phrase_length)) conn.commit() logger.info(f"Processed book {book_title}: inserted {len(phrases_to_insert)} phrases") except sqlite3.Error as e: logger.error(f"Database error processing {book_title}: {e}") else: logger.warning(f"No data found for book ID {book_id}") logger.info(f"Bible database initialization completed. Processed {len(book_names)} books.") return book_names def find_shortest_bible_match(gematria_sum: int, db_file: str = 'bible.db') -> Dict[str, Any]: """ Finds the shortest Bible verse in John that matches the given gematria sum. Args: gematria_sum: The gematria sum to match db_file: The SQLite database file to search in Returns: A dictionary with the matching verse information or None if no match is found """ logger.debug(f"Finding shortest Bible match for gematria sum: {gematria_sum} in {db_file}") try: with sqlite3.connect(db_file) as conn: cursor = conn.cursor() cursor.execute(''' SELECT words, book, chapter, verse, phrase_length, word_position FROM results WHERE gematria_sum = ? AND book = 'Revelation' ORDER BY LENGTH(words) ASC LIMIT 1 ''', (gematria_sum,)) result = cursor.fetchone() if result: logger.debug(f"Found Bible match: {result}") return { "words": result[0], "book": result[1], "chapter": result[2], "verse": result[3], "phrase_length": result[4], "word_position": result[5] if len(result) > 5 else None } else: logger.debug(f"No matching verse found in John for gematria sum: {gematria_sum}") return None except sqlite3.Error as e: logger.error(f"Database error when finding Bible match: {e}") return None except Exception as e: logger.error(f"Unexpected error when finding Bible match: {e}") return None