Spaces:

neuralworm
/

daily_psalm

Running

File size: 17,185 Bytes

import json
import os
import logging
import sqlite3
from typing import Dict, List, Any

logger = logging.getLogger(__name__)

def process_quran_files(start: int, end: int) -> Dict[int, Dict[str, Any]]:
    """
    Processes Quran JSON files and returns a dictionary mapping sura IDs to their data.

    Args:
        start: The starting sura ID (inclusive).
        end: The ending sura ID (inclusive).

    Returns:
        A dictionary where keys are sura IDs and values are dictionaries
        containing 'name' and 'text' fields.
    """
    base_path = "texts/quran"
    results = {}

    for i in range(start, end + 1):
        file_name = f"{base_path}/{i:03d}.json"
        try:
            with open(file_name, 'r', encoding='utf-8') as file:
                data = json.load(file)
                if data:
                    # Extract name and verses
                    name = data.get("name", "No title")
                    verses = data.get("verse", {})
                    text = [verses[key] for key in sorted(verses.keys())]

                    # Store sura ID as key and sura data as value
                    results[i] = {"name": name, "text": text}

        except FileNotFoundError:
            logger.warning(f"File {file_name} not found.")
        except json.JSONDecodeError as e:
            logger.warning(f"File {file_name} could not be read as JSON: {e}")
        except KeyError as e:
            logger.warning(f"Expected key 'verse' is missing in {file_name}: {e}")

    return results

def find_shortest_sura_match(gematria_sum: int, db_file: str = 'abjad.db') -> Dict[str, Any]:
    """
    Finds the shortest Quran sura verse in abjad.db.
    
    Args:
        gematria_sum: The gematria value to search for
        db_file: The database file to search in
        
    Returns:
        A dictionary containing the matched verse information or None if no match is found
    """
    logger.debug(f"Entering find_shortest_sura_match with gematria_sum: {gematria_sum}")
    
    with sqlite3.connect(db_file) as conn:
        cursor = conn.cursor()
        
        # First check if there are any Quran entries in the database
        cursor.execute('''
            SELECT COUNT(*) 
            FROM results 
            WHERE book != 'Psalms'
        ''')
        
        count = cursor.fetchone()[0]
        if count == 0:
            logger.warning("No Quran entries found in database. Run initialize_quran_db.py first.")
            return None
            
        # Search for a match, prioritizing shorter phrases
        cursor.execute('''
            SELECT words, book, chapter, verse, phrase_length, word_position
            FROM results
            WHERE gematria_sum = ? AND book != 'Psalms'
            ORDER BY phrase_length ASC, LENGTH(words) ASC
            LIMIT 1
        ''', (gematria_sum,))
        result = cursor.fetchone()
        
        if result:
            logger.debug(f"Shortest sura match found: {result}")
            return {
                "words": result[0], 
                "book": result[1], 
                "chapter": result[2], 
                "verse": result[3], 
                "phrase_length": result[4],
                "word_position": result[5] if len(result) > 5 else None
            }

        # If no exact match, try to find the closest match
        # This is similar to how quran_network handles it
        cursor.execute('''
            SELECT gematria_sum, ABS(gematria_sum - ?) as diff
            FROM results 
            WHERE book != 'Psalms'
            GROUP BY gematria_sum
            ORDER BY diff ASC
            LIMIT 1
        ''', (gematria_sum,))
        
        closest = cursor.fetchone()
        if closest:
            closest_gematria = closest[0]
            logger.debug(f"No exact match found. Closest gematria: {closest_gematria}")
            
            # Find the shortest verse with this gematria
            cursor.execute('''
                SELECT words, book, chapter, verse, phrase_length, word_position
                FROM results
                WHERE gematria_sum = ? AND book != 'Psalms'
                ORDER BY phrase_length ASC, LENGTH(words) ASC
                LIMIT 1
            ''', (closest_gematria,))
            
            result = cursor.fetchone()
            if result:
                logger.debug(f"Closest sura match found: {result}")
                return {
                    "words": result[0], 
                    "book": result[1], 
                    "chapter": result[2], 
                    "verse": result[3], 
                    "phrase_length": result[4],
                    "word_position": result[5] if len(result) > 5 else None
                }

        logger.debug("No matching sura found.")
        return None

def create_quran_display_iframe(sura_name: str, chapter: int, verse: int) -> str:
    """Creates an iframe HTML string for displaying a Quran verse."""
    logger.debug(f"Creating Quran display iframe for sura: {sura_name}, chapter: {chapter}, verse: {verse}")
    
    # Use surahquran.com URL format
    url = f"https://surahquran.com/aya-{verse}-sora-{chapter}.html"
    iframe = f'<iframe src="{url}" width="800" height="600"></iframe>'
    
    logger.debug(f"Generated iframe: {iframe}")
    return iframe

def get_sura_count() -> int:
    """Returns the total number of suras in the Quran."""
    base_path = "texts/quran"
    
    # Count the number of JSON files in the quran directory
    try:
        files = [f for f in os.listdir(base_path) if f.endswith('.json')]
        return len(files)
    except FileNotFoundError:
        logger.error(f"Directory {base_path} not found.")
        return 114  # Default number of suras in the Quran


def get_first_els_result_quran(gematria_sum: int, tlang: str = "en") -> Dict[str, Any]:
    """
    Gets the first ELS result from the Quran using the gematria sum as the step.
    
    Args:
        gematria_sum: The gematria value to use as the ELS step
        tlang: Target language for results
        
    Returns:
        The first ELS result found or None
    """
    import hashlib
    import json
    from gematria import strip_diacritics
    
    logger.debug(f"Entering get_first_els_result_quran with gematria_sum: {gematria_sum}, tlang: {tlang}")
    
    # Create a cache key
    cache_key = f"quran_els_{gematria_sum}_{tlang}"
    cache_file = "els_cache.db"
    
    # Check cache first
    try:
        with sqlite3.connect(cache_file) as conn:
            cursor = conn.cursor()
            cursor.execute(
                "SELECT results FROM els_cache WHERE query_hash = ?", 
                (hashlib.sha256(cache_key.encode()).hexdigest(),))
            result = cursor.fetchone()
            if result:
                logger.info(f"Cache hit for Quran ELS query: {cache_key}")
                return json.loads(result[0])
    except sqlite3.Error as e:
        logger.error(f"Database error checking cache: {e}")
    
    # Cache miss, perform ELS search
    logger.info(f"Cache miss for Quran ELS query: {cache_key}, performing search")
    
    # Load all Quran text
    sura_count = get_sura_count()
    quran_data = process_quran_files(1, sura_count)
    
    # Concatenate all verses from all suras into a single text
    all_text = ""
    for sura_id, sura_info in sorted(quran_data.items()):
        # Add a space between suras to prevent cross-sura word formation
        if all_text:
            all_text += " "
        
        # Add all verses from this sura
        verses = sura_info['text']
        all_text += " ".join(verses)
    
    # Clean up the text: strip diacritics, remove any special characters, etc.
    clean_text = strip_diacritics(all_text)
    clean_text = ''.join(c for c in clean_text if c.isalpha() or c.isspace())
    
    # Perform ELS search with the gematria_sum as the step
    result = None
    if clean_text:
        # Remove spaces for ELS search
        text_no_spaces = clean_text.replace(" ", "")
        
        # Track character positions to their original sura/verse
        char_map = []  # List of (sura_id, verse_id) for each character
        
        # Build character position mapping
        current_pos = 0
        for sura_id, sura_info in sorted(quran_data.items()):
            sura_name = sura_info['name']
            verses = sura_info['text']
            
            for verse_idx, verse in enumerate(verses, 1):
                cleaned_verse = strip_diacritics(verse).replace(" ", "")
                for _ in cleaned_verse:
                    if current_pos < len(text_no_spaces):
                        char_map.append((sura_id, sura_name, verse_idx))
                        current_pos += 1
        
        # Start positions to try (we'll try the first 100 positions for better coverage)
        for start_pos in range(min(100, len(text_no_spaces))):
            # Extract characters at positions: start_pos, start_pos+step, start_pos+2*step, etc.
            extracted = ""
            positions = []
            pos = start_pos
            
            # Extract up to 7 characters (typical ELS result length)
            for _ in range(7):
                if pos < len(text_no_spaces):
                    extracted += text_no_spaces[pos]
                    positions.append(pos)
                    pos += gematria_sum
                else:
                    break
            
            if len(extracted) >= 3:  # At least 3 characters
                # Look up the sura/verse for the first and last character
                first_pos = positions[0]
                last_pos = positions[-1]
                
                if first_pos < len(char_map) and last_pos < len(char_map):
                    first_loc = char_map[first_pos]
                    last_loc = char_map[last_pos]
                    
                    result = {
                        "result_text": extracted,
                        "source": "Quran",
                        "start_position": start_pos,
                        "step": gematria_sum,
                        "start_sura": first_loc[0],
                        "start_sura_name": first_loc[1],
                        "start_verse": first_loc[2],
                        "end_sura": last_loc[0],
                        "end_sura_name": last_loc[1],
                        "end_verse": last_loc[2],
                        "positions": positions
                    }
                    break  # Found a result, stop searching
                else:
                    logger.warning(f"Character position mapping inconsistency: {first_pos}, {last_pos} vs {len(char_map)}")
                    continue
    
    # Cache the result
    if result:
        try:
            with sqlite3.connect(cache_file) as conn:
                cursor = conn.cursor()
                
                # Make sure the table exists
                cursor.execute('''
                    CREATE TABLE IF NOT EXISTS els_cache (
                        query_hash TEXT PRIMARY KEY,
                        function_name TEXT,
                        args TEXT,
                        kwargs TEXT,
                        results TEXT
                    )
                ''')
                
                cursor.execute(
                    "INSERT OR REPLACE INTO els_cache (query_hash, function_name, args, kwargs, results) VALUES (?, ?, ?, ?, ?)",
                    (hashlib.sha256(cache_key.encode()).hexdigest(), "get_first_els_result_quran", 
                     json.dumps([gematria_sum]), json.dumps({"tlang": tlang}), json.dumps(result)))
                conn.commit()
                logger.debug("Cached Quran ELS results in database.")
        except sqlite3.Error as e:
            logger.error(f"Database error caching results: {e}")
    
    logger.debug(f"Exiting get_first_els_result_quran, returning: {result}")
    return result
        
def initialize_quran_database(db_file: str = 'abjad.db', max_phrase_length: int = 1):
    """
    Initializes the abjad database with Quran verses.
    This function processes all Quran JSON files and adds their gematria values to the database.
    
    Args:
        db_file: The SQLite database file to use
        max_phrase_length: Maximum phrase length to process
    """
    from gematria import calculate_gematria, strip_diacritics
    from tqdm import tqdm  # Import tqdm for progress bars
    
    logger.info(f"Initializing Quran database: {db_file}")
    
    # Create the database if it doesn't exist
    with sqlite3.connect(db_file) as conn:
        cursor = conn.cursor()
        # Create results table
        cursor.execute('''
        CREATE TABLE IF NOT EXISTS results (
            gematria_sum INTEGER,
            words TEXT,
            translation TEXT,
            book TEXT,
            chapter INTEGER,
            verse INTEGER,
            phrase_length INTEGER,
            word_position TEXT,
            PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position)
        )
        ''')
        
        cursor.execute('''
        CREATE INDEX IF NOT EXISTS idx_results_gematria
        ON results (gematria_sum)
        ''')
        
        # Create processed_books table to track processing
        cursor.execute('''
        CREATE TABLE IF NOT EXISTS processed_books (
            book TEXT PRIMARY KEY,
            max_phrase_length INTEGER
        )
        ''')
        
        conn.commit()
    
    # Process all Quran files
    sura_count = get_sura_count()
    logger.info(f"Found {sura_count} suras to process")
    
    # Global counter for word position tracking
    total_word_count = 0
    book_names = {}
    
    with sqlite3.connect(db_file) as conn:
        cursor = conn.cursor()
        
        # Process each sura (book)
        for sura_id in tqdm(range(1, sura_count + 1), desc="Processing Suras"):
            # Load sura data
            sura_data = process_quran_files(sura_id, sura_id)
            
            if sura_id in sura_data:
                sura_info = sura_data[sura_id]
                sura_name = sura_info['name']
                book_names[sura_id] = sura_name
                
                # Check if this sura has already been processed
                cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (sura_name,))
                result = cursor.fetchone()
                if result and result[0] >= max_phrase_length:
                    logger.info(f"Skipping sura {sura_name}: Already processed with max_phrase_length {result[0]}")
                    continue
                
                verses = sura_info['text']
                phrases_to_insert = []
                
                for verse_idx, verse_text in enumerate(verses, 1):
                    # Split verse into words
                    words = verse_text.split()
                    
                    # Process phrases of different lengths
                    for length in range(1, max_phrase_length + 1):
                        for start in range(len(words) - length + 1):
                            phrase = " ".join(words[start:start + length])
                            cleaned_phrase = strip_diacritics(phrase)
                            gematria_sum = calculate_gematria(cleaned_phrase.replace(" ", ""))
                            
                            # Calculate word position range
                            word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}"
                            
                            # Add to batch insert list
                            phrases_to_insert.append(
                                (gematria_sum, cleaned_phrase, "", sura_name, sura_id, verse_idx, length, word_position_range)
                            )
                    
                    # Update total word count after processing each verse
                    total_word_count += len(words)
                    
                # If we have phrases to insert, do a batch insert
                if phrases_to_insert:
                    try:
                        cursor.executemany('''
                        INSERT OR IGNORE INTO results 
                        (gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position)
                        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                        ''', phrases_to_insert)
                        
                        # Update processed_books after processing each book
                        cursor.execute('''
                        INSERT OR REPLACE INTO processed_books (book, max_phrase_length)
                        VALUES (?, ?)
                        ''', (sura_name, max_phrase_length))
                        
                        conn.commit()
                    except sqlite3.Error as e:
                        logger.error(f"Database error: {e} for sura {sura_id}")
            else:
                logger.warning(f"Sura {sura_id} not found in processed data")
    
    logger.info("Quran database initialization completed successfully")