Spaces:

neuralworm
/

daily_psalm

Running

File size: 19,025 Bytes

import json
import os
import logging
import sqlite3
from typing import Dict, List, Any

logger = logging.getLogger(__name__)

def process_quran_files(start: int, end: int) -> Dict[int, Dict[str, Any]]:
    """
    Processes Quran JSON files and returns a dictionary mapping sura IDs to their data.

    Args:
        start: The starting sura ID (inclusive).
        end: The ending sura ID (inclusive).

    Returns:
        A dictionary where keys are sura IDs and values are dictionaries
        containing 'name' and 'text' fields.
    """
    base_path = "texts/quran"
    results = {}

    for i in range(start, end + 1):
        file_name = f"{base_path}/{i:03d}.json"
        try:
            with open(file_name, 'r', encoding='utf-8') as file:
                data = json.load(file)
                if data:
                    # Extract name and verses
                    name = data.get("name", "No title")
                    verses = data.get("verse", {})
                    text = [verses[key] for key in sorted(verses.keys())]

                    # Store sura ID as key and sura data as value
                    results[i] = {"name": name, "text": text}

        except FileNotFoundError:
            logger.warning(f"File {file_name} not found.")
        except json.JSONDecodeError as e:
            logger.warning(f"File {file_name} could not be read as JSON: {e}")
        except KeyError as e:
            logger.warning(f"Expected key 'verse' is missing in {file_name}: {e}")

    return results

def find_shortest_sura_match(gematria_sum: int, db_file: str = 'abjad.db') -> Dict[str, Any]:
    """
    Finds the shortest Quran sura verse in abjad.db.
    
    Args:
        gematria_sum: The gematria value to search for
        db_file: The database file to search in
        
    Returns:
        A dictionary containing the matched verse information or None if no match is found
    """
    logger.debug(f"Entering find_shortest_sura_match with gematria_sum: {gematria_sum}")
    
    with sqlite3.connect(db_file) as conn:
        cursor = conn.cursor()
        
        # First check if there are any Quran entries in the database
        cursor.execute('''
            SELECT COUNT(*) 
            FROM results 
            WHERE book != 'Psalms'
        ''')
        
        count = cursor.fetchone()[0]
        if count == 0:
            logger.warning("No Quran entries found in database. Run initialize_quran_db.py first.")
            return None
            
        # Search for a match, prioritizing shorter phrases
        cursor.execute('''
            SELECT words, book, chapter, verse, phrase_length, word_position
            FROM results
            WHERE gematria_sum = ? AND book != 'Psalms'
            ORDER BY phrase_length ASC, LENGTH(words) ASC
            LIMIT 1
        ''', (gematria_sum,))
        result = cursor.fetchone()
        
        if result:
            logger.debug(f"Shortest sura match found: {result}")
            return {
                "words": result[0], 
                "book": result[1], 
                "chapter": result[2], 
                "verse": result[3], 
                "phrase_length": result[4],
                "word_position": result[5] if len(result) > 5 else None
            }

        # If no exact match, try to find the closest match
        # This is similar to how quran_network handles it
        cursor.execute('''
            SELECT gematria_sum, ABS(gematria_sum - ?) as diff
            FROM results 
            WHERE book != 'Psalms'
            GROUP BY gematria_sum
            ORDER BY diff ASC
            LIMIT 1
        ''', (gematria_sum,))
        
        closest = cursor.fetchone()
        if closest:
            closest_gematria = closest[0]
            logger.debug(f"No exact match found. Closest gematria: {closest_gematria}")
            
            # Find the shortest verse with this gematria
            cursor.execute('''
                SELECT words, book, chapter, verse, phrase_length, word_position
                FROM results
                WHERE gematria_sum = ? AND book != 'Psalms'
                ORDER BY phrase_length ASC, LENGTH(words) ASC
                LIMIT 1
            ''', (closest_gematria,))
            
            result = cursor.fetchone()
            if result:
                logger.debug(f"Closest sura match found: {result}")
                return {
                    "words": result[0], 
                    "book": result[1], 
                    "chapter": result[2], 
                    "verse": result[3], 
                    "phrase_length": result[4],
                    "word_position": result[5] if len(result) > 5 else None
                }

        logger.debug("No matching sura found.")
        return None

def create_quran_display_iframe(sura_name: str, chapter: int, verse: int) -> str:
    """Creates an iframe HTML string for displaying a Quran verse."""
    logger.debug(f"Creating Quran display iframe for sura: {sura_name}, chapter: {chapter}, verse: {verse}")
    
    # Use surahquran.com URL format
    url = f"https://surahquran.com/aya-{verse}-sora-{chapter}.html"
    iframe = f'<iframe src="{url}" width="800" height="600"></iframe>'
    
    logger.debug(f"Generated iframe: {iframe}")
    return iframe

def get_sura_count() -> int:
    """Returns the total number of suras in the Quran."""
    base_path = "texts/quran"
    
    # Count the number of JSON files in the quran directory
    try:
        files = [f for f in os.listdir(base_path) if f.endswith('.json')]
        return len(files)
    except FileNotFoundError:
        logger.error(f"Directory {base_path} not found.")
        return 114  # Default number of suras in the Quran


def get_first_els_result_quran(gematria_sum: int, tlang: str = "en", rounds_combination: str = "1,-1") -> Dict[str, Any]:
    """
    Gets the first ELS result from the Quran using the gematria sum as the step,
    following the same method as Torah ELS: combined +1/-1 rounds.
    
    For Quran, the implementation specifically:
    1. Takes +1 ELS round from the start of book 1 to the end of book 2
    2. Takes -1 ELS round from the end of book 2 to the start of book 1
    
    Args:
        gematria_sum: The gematria value to use as the ELS step
        tlang: Target language for results
        rounds_combination: Comma-separated string of round directions, defaults to "1,-1"
        
    Returns:
        The first ELS result found or None
    """
    import hashlib
    import json
    import math
    from gematria import strip_diacritics, calculate_gematria
    
    logger.debug(f"Entering get_first_els_result_quran with gematria_sum: {gematria_sum}, tlang: {tlang}, rounds_combination: {rounds_combination}")
    
    # Create a cache key including the rounds_combination
    cache_key = f"quran_els_{gematria_sum}_{tlang}_{rounds_combination}"
    cache_file = "els_cache.db"
    
    # Check cache first
    try:
        with sqlite3.connect(cache_file) as conn:
            cursor = conn.cursor()
            cursor.execute(
                "SELECT results FROM els_cache WHERE query_hash = ?", 
                (hashlib.sha256(cache_key.encode()).hexdigest(),))
            result = cursor.fetchone()
            if result:
                logger.info(f"Cache hit for Quran ELS query: {cache_key}")
                return json.loads(result[0])
    except sqlite3.Error as e:
        logger.error(f"Database error checking cache: {e}")
    
    # Cache miss, perform ELS search
    logger.info(f"Cache miss for Quran ELS query: {cache_key}, performing search")
    
    # Load Quran text for books 1 and 2 only (based on the requirement)
    quran_data = process_quran_files(1, 2)  # Only books 1 and 2 as specified
    
    # Concatenate verses into a single text
    all_text = ""
    sura_verse_map = []  # Track (sura_id, sura_name, verse_idx) for each character
    
    for sura_id, sura_info in sorted(quran_data.items()):
        sura_name = sura_info['name']
        verses = sura_info['text']
        
        # Add a space between suras to prevent cross-sura word formation
        if all_text:
            all_text += " "
        
        # Add all verses from this sura and track the mapping
        sura_start_pos = len(all_text)
        all_text += " ".join(verses)
        
        # Track character positions to their original sura/verse for later lookup
        current_verse_start = sura_start_pos
        for verse_idx, verse in enumerate(verses, 1):
            for _ in range(len(verse) + (1 if verse_idx < len(verses) else 0)):  # Add 1 for space between verses
                sura_verse_map.append((sura_id, sura_name, verse_idx))
    
    # Clean up the text: strip diacritics, remove special characters
    clean_text = strip_diacritics(all_text)
    clean_text = ''.join(c for c in clean_text if c.isalpha() or c.isspace())
    
    # Remove spaces for ELS search
    text_no_spaces = clean_text.replace(" ", "")
    text_length = len(text_no_spaces)
    
    if text_length == 0:
        logger.warning("No text available after cleaning")
        return None
    
    # Build a more accurate character map without spaces
    char_map = []
    char_idx = 0
    for i, c in enumerate(clean_text):
        if c.isalpha():
            if char_idx < len(sura_verse_map):
                char_map.append(sura_verse_map[i])
            char_idx += 1
    
    # Parse rounds combination - default is "1,-1"
    rounds_list = list(map(float, rounds_combination.split(',')))
    
    result = None
    complete_result = ""
    complete_positions = []
    first_position = None
    last_position = None
    
    # Process each round direction (similar to Torah ELS)
    for round_dir in rounds_list:
        # Determine if this is a forward or backward search
        is_forward = round_dir > 0
        start_index = 0 if is_forward else (text_length - 1)
        
        # Set step and direction
        step = gematria_sum
        direction = 1 if is_forward else -1
        
        # Extract ELS characters
        round_text = ""
        positions = []
        pos = start_index
        
        # Extract up to 10 characters, but we'll use at least 3 for a valid result
        for _ in range(10):
            if 0 <= pos < text_length:
                round_text += text_no_spaces[pos]
                positions.append(pos)
                pos += direction * step
            else:
                break
        
        if len(round_text) >= 3:
            # Save this round's results
            complete_result += round_text
            complete_positions.extend(positions)
            
            # Track first and last positions for the overall result
            if first_position is None or (is_forward and positions[0] < first_position):
                first_position = positions[0]
                first_loc = char_map[first_position] if first_position < len(char_map) else None
            
            if last_position is None or (not is_forward and positions[-1] > last_position):
                last_position = positions[-1]
                last_loc = char_map[last_position] if last_position < len(char_map) else None
    
    # Create result if we found something
    if complete_result and len(complete_result) >= 3 and first_position is not None and last_position is not None:
        if first_position < len(char_map) and last_position < len(char_map):
            first_loc = char_map[first_position]
            last_loc = char_map[last_position]
            
            result = {
                "result_text": complete_result,
                "source": "Quran",
                "step": gematria_sum,
                "start_sura": first_loc[0],
                "start_sura_name": first_loc[1],
                "start_verse": first_loc[2],
                "end_sura": last_loc[0],
                "end_sura_name": last_loc[1],
                "end_verse": last_loc[2],
                "positions": complete_positions,
                "rounds_combination": rounds_combination
            }
            
            # Calculate gematria of the result text
            result["result_sum"] = calculate_gematria(complete_result)
            
            logger.debug(f"Found ELS result: {complete_result} with gematria {result['result_sum']}")
        else:
            logger.warning(f"Character position mapping inconsistency: {first_position}, {last_position} vs {len(char_map)}")
    
    # Cache the result
    if result:
        try:
            with sqlite3.connect(cache_file) as conn:
                cursor = conn.cursor()
                
                # Make sure the table exists
                cursor.execute('''
                    CREATE TABLE IF NOT EXISTS els_cache (
                        query_hash TEXT PRIMARY KEY,
                        function_name TEXT,
                        args TEXT,
                        kwargs TEXT,
                        results TEXT
                    )
                ''')
                
                cursor.execute(
                    "INSERT OR REPLACE INTO els_cache (query_hash, function_name, args, kwargs, results) VALUES (?, ?, ?, ?, ?)",
                    (hashlib.sha256(cache_key.encode()).hexdigest(), "get_first_els_result_quran", 
                     json.dumps([gematria_sum]), json.dumps({"tlang": tlang, "rounds_combination": rounds_combination}), json.dumps(result)))
                conn.commit()
                logger.debug("Cached Quran ELS results in database.")
        except sqlite3.Error as e:
            logger.error(f"Database error caching results: {e}")
    
    logger.debug(f"Exiting get_first_els_result_quran, returning: {result}")
    return result
        
def initialize_quran_database(db_file: str = 'abjad.db', max_phrase_length: int = 1):
    """
    Initializes the abjad database with Quran verses.
    This function processes all Quran JSON files and adds their gematria values to the database.
    
    Args:
        db_file: The SQLite database file to use
        max_phrase_length: Maximum phrase length to process
    """
    from gematria import calculate_gematria, strip_diacritics
    from tqdm import tqdm  # Import tqdm for progress bars
    
    logger.info(f"Initializing Quran database: {db_file}")
    
    # Create the database if it doesn't exist
    with sqlite3.connect(db_file) as conn:
        cursor = conn.cursor()
        # Create results table
        cursor.execute('''
        CREATE TABLE IF NOT EXISTS results (
            gematria_sum INTEGER,
            words TEXT,
            translation TEXT,
            book TEXT,
            chapter INTEGER,
            verse INTEGER,
            phrase_length INTEGER,
            word_position TEXT,
            PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position)
        )
        ''')
        
        cursor.execute('''
        CREATE INDEX IF NOT EXISTS idx_results_gematria
        ON results (gematria_sum)
        ''')
        
        # Create processed_books table to track processing
        cursor.execute('''
        CREATE TABLE IF NOT EXISTS processed_books (
            book TEXT PRIMARY KEY,
            max_phrase_length INTEGER
        )
        ''')
        
        conn.commit()
    
    # Process all Quran files
    sura_count = get_sura_count()
    logger.info(f"Found {sura_count} suras to process")
    
    # Global counter for word position tracking
    total_word_count = 0
    book_names = {}
    
    with sqlite3.connect(db_file) as conn:
        cursor = conn.cursor()
        
        # Process each sura (book)
        for sura_id in tqdm(range(1, sura_count + 1), desc="Processing Suras"):
            # Load sura data
            sura_data = process_quran_files(sura_id, sura_id)
            
            if sura_id in sura_data:
                sura_info = sura_data[sura_id]
                sura_name = sura_info['name']
                book_names[sura_id] = sura_name
                
                # Check if this sura has already been processed
                cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (sura_name,))
                result = cursor.fetchone()
                if result and result[0] >= max_phrase_length:
                    logger.info(f"Skipping sura {sura_name}: Already processed with max_phrase_length {result[0]}")
                    continue
                
                verses = sura_info['text']
                phrases_to_insert = []
                
                for verse_idx, verse_text in enumerate(verses, 1):
                    # Split verse into words
                    words = verse_text.split()
                    
                    # Process phrases of different lengths
                    for length in range(1, max_phrase_length + 1):
                        for start in range(len(words) - length + 1):
                            phrase = " ".join(words[start:start + length])
                            cleaned_phrase = strip_diacritics(phrase)
                            gematria_sum = calculate_gematria(cleaned_phrase.replace(" ", ""))
                            
                            # Calculate word position range
                            word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}"
                            
                            # Add to batch insert list
                            phrases_to_insert.append(
                                (gematria_sum, cleaned_phrase, "", sura_name, sura_id, verse_idx, length, word_position_range)
                            )
                    
                    # Update total word count after processing each verse
                    total_word_count += len(words)
                    
                # If we have phrases to insert, do a batch insert
                if phrases_to_insert:
                    try:
                        cursor.executemany('''
                        INSERT OR IGNORE INTO results 
                        (gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position)
                        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                        ''', phrases_to_insert)
                        
                        # Update processed_books after processing each book
                        cursor.execute('''
                        INSERT OR REPLACE INTO processed_books (book, max_phrase_length)
                        VALUES (?, ?)
                        ''', (sura_name, max_phrase_length))
                        
                        conn.commit()
                    except sqlite3.Error as e:
                        logger.error(f"Database error: {e} for sura {sura_id}")
            else:
                logger.warning(f"Sura {sura_id} not found in processed data")
    
    logger.info("Quran database initialization completed successfully")